java用NLPIR对本地txt进行分词,并将分词结果写入本地

来源:互联网 发布:侠义道2武功数据 编辑:程序博客网 时间:2024/05/18 01:28

一:下载资源:

1:使用的是NLPIR-ICTCLAS2016的java接口


2:平台:win7 64位

二:Myeclipse启动工程

1:代开Myeclipse,导入项目:



      导入项目后,只有NIPIRTest.java,实现分词
     另外的MyFileRead.java实现读取本地txt文档
     MyFileSave.java实现将分词结果保存到本地txt

2:修改NIPIRTest.java类

需要修改2处路径:
一处为:CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
另一处为:
String argu = "C:\\NLPIR-ICTCLAS2016";
注意编码格式为:utf-8

3:源码

NIPIRTest.java,:

package code;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.jar.Attributes.Name;


import utils.SystemParas;


import com.sun.jna.Library;
import com.sun.jna.Native;


public class NlpirTest {
// 定义接口CLibrary,继承自com.sun.jna.Library
public interface CLibrary extends Library {
// 定义并初始化接口的静态变量
CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
public int NLPIR_Init(String sDataPath, int encoding,
String sLicenceCode);
public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);


public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public int NLPIR_AddUserWord(String sWord);//add by qp 2008.11.10
public int NLPIR_DelUsrWord(String sWord);//add by qp 2008.11.10
public String NLPIR_GetLastErrorMsg();
public void NLPIR_Exit();
}
public static String transString(String aidString, String ori_encoding,
String new_encoding) {
try {
return new String(aidString.getBytes(ori_encoding), new_encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
String argu = "C:\\NLPIR-ICTCLAS2016";
// String system_charset = "GBK";//GBK----0
@SuppressWarnings("unused")
String system_charset = "UTF-8";
int charset_type = 1;

int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");
String nativeBytes = null;
String nativeByte = null;
ArrayList<String> name = new ArrayList<String>();
        ArrayList<String> classify = new ArrayList<String>();
if (0 == init_flag) {
nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();
System.err.println("初始化失败!fail reason is "+nativeBytes);
return;
}
try {
nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords("C:\\专利文献全文获取_xpdf.txt", 10,false);


System.out.println("关键词提取结果是:" + nativeByte);


String file="C:\\专利文献全文获取_xpdf.txt";
String sinputt= MyFileReader.read(file);
nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sinputt, 1);
System.out.println("分词结果为: " + nativeBytes);
CLibrary.Instance.NLPIR_Exit();
            //以空格分离,把每个词/v分别存到数组里
String[] nativeBytesArray=nativeBytes.split(" ");
MyFileSave save=new MyFileSave();
save.Save(nativeBytesArray);
}
} catch (Exception ex) {
// TODO Auto-generated catch block
ex.printStackTrace();
}
}
}

MyFileRead.java

package code;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;


public class MyFileReader{
public static String read(String filePath) {
String result = null;
 try {
          String encoding="utf-8";
          File file=new File(filePath);
          if(file.isFile() && file.exists()){ //判断文件是否存在
              InputStreamReader read = new InputStreamReader(
              new FileInputStream(file),encoding);//考虑到编码格式
              BufferedReader bufferedReader = new BufferedReader(read);
              String lineTxt = null;
              result = "";
              while((lineTxt = bufferedReader.readLine()) != null){
                  //System.out.println(lineTxt);
             result+= lineTxt;
              }
              read.close();
             
 }else{
     System.out.println("找不到指定的文件");
 }
 } catch (Exception e) {
     System.out.println("读取文件内容出错");
     e.printStackTrace();
 }
 
 return result;
}
}

MyFileSave.java

package code;


import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;


public class MyFileSave {

public void Save(String[] a){
//String rootPath="C:\\";
FileOutputStream foS=null;
try {
 
foS=new FileOutputStream("C:\\专利文献全文获取分词结果.txt",true);//第二个参数为是否设置追加文件
PrintWriter pWriter=new PrintWriter(foS);
for(int i=0;i<a.length;i++){
pWriter.write(a[i]+"  ");
}
pWriter.flush();

} catch (FileNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
foS.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}

}
}

四:运行

运行结果展示:



完结!
0 0