Apache OpenNLP使用

来源:互联网 发布:python函数传入文件名 编辑:程序博客网 时间:2024/04/27 14:09


import java.io.*;import opennlp.tools.sentdetect.SentenceDetectorME;import opennlp.tools.sentdetect.SentenceModel;import opennlp.tools.tokenize.Tokenizer;import opennlp.tools.tokenize.TokenizerME;import opennlp.tools.tokenize.TokenizerModel;import opennlp.tools.util.Span;public class Testing_openNLP {/* http://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html   官方教程Apache OpenNLP Developer Documentation * openNLP 中的各种模型可以在   http://opennlp.sourceforge.net/models-1.5/   下载 * http://www.programcreek.com/2012/05/opennlp-tutorial/    this is good tutorial about openNLP tools *  * */public static void main(String[] args) {//String testString = "This isn't the greatest example sentence in the world because I've seen better.  Neither is this one.  This one's not bad, though.";String testString = "Hi. How are you?  This is      &3 $444 Mike.";String tokens[] = Token(testString);String sentences[] = sentenceSegmentation(testString);String aa = "";}//分句public static String[] sentenceSegmentation(String str){ try {InputStream modelIn = new FileInputStream("en-sent.bin");SentenceModel model = null;try {   model = new SentenceModel(modelIn);}catch (IOException e) {  e.printStackTrace();}finally {  if (modelIn != null) {    try {      modelIn.close();    }    catch (IOException e) {    }  }}SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);String sentences[] = sentenceDetector.sentDetect(str);return sentences;} catch (FileNotFoundException e1) {e1.printStackTrace();return null;}}//分词public static String[] Token(String str){ try{ InputStream modelIn = new FileInputStream("en-token.bin"); TokenizerModel model = null; try {   model = new TokenizerModel(modelIn); } catch (IOException e) {   e.printStackTrace(); } finally {   if (modelIn != null) {     try {       modelIn.close();     }     catch (IOException e) {     }   } } TokenizerME tokenizer = new TokenizerME(model); String tokens[] = tokenizer.tokenize(str); //double tokenProbs[] = tokenizer.getTokenProbabilities();//must be called directly after one of the tokenize methods was called.return tokens; } catch(FileNotFoundException e){return null;} } }

0 0
原创粉丝点击