NLPIR分词使用说明

来源:互联网 发布:迪士尼乐园全套源码 编辑:程序博客网 时间:2024/06/04 18:43

NLPIR系统授权,基本上1个月要授权一次,在这里替换Data/NLPIR.user即可

给出一个分词的使用实例:
1:上官网下载分词工具包

2:随便新建一个Java工程;

3:把jna.jar放到classpath下面,在eclipse中就是普通的手动添加jar包

4:新建或者直接把Data文件夹添加到工程中;

5:添加跟自己系统匹配的NLPIR.dll和NLPIR.lib文件,如果是maven工程放到resource下就行,或者放到新建的文件夹下。

6:根据给出的实例修改,主要就是注意一下上面的各个文件的路径的配置。

这里写图片描述

上面的是项目,文件的位置。

下面的是一个使用的demo;

public class NlpirSegmentUtil {    public static Logger logger = Logger.getLogger("NlpirSegmentUtil");    List<String> dictionary_words = new ArrayList<>();    public NlpirSegmentUtil(List<String> dictionary_words) {        // TODO Auto-generated constructor stub        this.dictionary_words = dictionary_words;    }    public NlpirSegmentUtil() {    }    public interface CLibrary extends Library {        CLibrary Instance = (CLibrary) Native.loadLibrary("NLPIR", CLibrary.class);        public int NLPIR_Init(String sDataPath, int encoding, String sLicenceCode);        public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);        public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit, boolean bWeightOut);        public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit, boolean bWeightOut);        public int NLPIR_AddUserWord(String sWord);// add by qp 2008.11.10        public int NLPIR_DelUsrWord(String sWord);// add by qp 2008.11.10        public String NLPIR_GetLastErrorMsg();        public void NLPIR_Exit();    }    public static String transString(String aidString, String ori_encoding, String new_encoding) {        try {            return new String(aidString.getBytes(ori_encoding), new_encoding);        } catch (UnsupportedEncodingException e) {            e.printStackTrace();        }        return null;    }    public List<String> getSentenceSegmentResult(List<String> sentences) {        return getSentenceSegmentResult(sentences,dictionary_words);    }    public List<String> getSentenceSegmentResult(List<String> sentences,List<String> dictionary_words) {        List<String> sentencesSeged = new ArrayList<>();        String argu = System.getProperty("user.dir");        int charset_type = 1;        int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");        String nativeBytes = null;        if (0 == init_flag) {            nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();            logger.info("初始化失败!fail reason is " + nativeBytes);        }        try {            for (String symptom_Name : dictionary_words) {                CLibrary.Instance.NLPIR_AddUserWord(symptom_Name);            }            for(String sInput : sentences){                nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 1);                logger.info("增加用户词典后分词结果为:" + nativeBytes);                sentencesSeged.add(nativeBytes);            }            CLibrary.Instance.NLPIR_Exit();        } catch (Exception ex) {            // TODO Auto-generated catch block            ex.printStackTrace();        }        return sentencesSeged;    }    public void test(String sentenceContent, List<String> symptoms) {        // String argu = SystemParas.data_dir_parent_path;        String argu = System.getProperty("user.dir");        int charset_type = 1;        int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");        String nativeBytes = null;        if (0 == init_flag) {            nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();            logger.info("初始化失败!fail reason is " + nativeBytes);        }        try {            for (String symptom_Name : symptoms) {                CLibrary.Instance.NLPIR_AddUserWord(symptom_Name);            }            String sInput = sentenceContent;            nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sInput, 1);            Logger.getLogger("yuan_data").info("增加用户词典后分词结果为:" + nativeBytes);            CLibrary.Instance.NLPIR_Exit();        } catch (Exception ex) {            // TODO Auto-generated catch block            ex.printStackTrace();        }    }    public static void main(String[] args) {        NlpirSegmentUtil nlpirSegmentForWordCluster = new NlpirSegmentUtil();        List<String> list = new ArrayList<>();        list.add("口干");        list.add("舌燥");        List<String> sentence = new ArrayList<>();        sentence.add("间断性口干舌燥三天。");        nlpirSegmentForWordCluster.getSentenceSegmentResult(sentence, list);    }}
0 0
原创粉丝点击