java用NLPIR对本地txt进行分词,并将分词结果写入本地
来源:互联网 发布:侠义道2武功数据 编辑:程序博客网 时间:2024/05/18 01:28
一:下载资源:
1:使用的是NLPIR-ICTCLAS2016的java接口
2:平台:win7 64位
二:Myeclipse启动工程
1:代开Myeclipse,导入项目:
导入项目后,只有NIPIRTest.java,实现分词
另外的MyFileRead.java实现读取本地txt文档
MyFileSave.java实现将分词结果保存到本地txt
2:修改NIPIRTest.java类
需要修改2处路径:
一处为:CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
另一处为:
String argu = "C:\\NLPIR-ICTCLAS2016";
注意编码格式为:utf-8
3:源码
NIPIRTest.java,:
package code;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.jar.Attributes.Name;
import utils.SystemParas;
import com.sun.jna.Library;
import com.sun.jna.Native;
public class NlpirTest {
// 定义接口CLibrary,继承自com.sun.jna.Library
public interface CLibrary extends Library {
// 定义并初始化接口的静态变量
CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
public int NLPIR_Init(String sDataPath, int encoding,
String sLicenceCode);
public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public int NLPIR_AddUserWord(String sWord);//add by qp 2008.11.10
public int NLPIR_DelUsrWord(String sWord);//add by qp 2008.11.10
public String NLPIR_GetLastErrorMsg();
public void NLPIR_Exit();
}
public static String transString(String aidString, String ori_encoding,
String new_encoding) {
try {
return new String(aidString.getBytes(ori_encoding), new_encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
String argu = "C:\\NLPIR-ICTCLAS2016";
// String system_charset = "GBK";//GBK----0
@SuppressWarnings("unused")
String system_charset = "UTF-8";
int charset_type = 1;
int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");
String nativeBytes = null;
String nativeByte = null;
ArrayList<String> name = new ArrayList<String>();
ArrayList<String> classify = new ArrayList<String>();
if (0 == init_flag) {
nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();
System.err.println("初始化失败!fail reason is "+nativeBytes);
return;
}
try {
nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords("C:\\专利文献全文获取_xpdf.txt", 10,false);
System.out.println("关键词提取结果是:" + nativeByte);
String file="C:\\专利文献全文获取_xpdf.txt";
String sinputt= MyFileReader.read(file);
nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sinputt, 1);
System.out.println("分词结果为: " + nativeBytes);
CLibrary.Instance.NLPIR_Exit();
//以空格分离,把每个词/v分别存到数组里
String[] nativeBytesArray=nativeBytes.split(" ");
MyFileSave save=new MyFileSave();
save.Save(nativeBytesArray);
}
} catch (Exception ex) {
// TODO Auto-generated catch block
ex.printStackTrace();
}
}
}
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.jar.Attributes.Name;
import utils.SystemParas;
import com.sun.jna.Library;
import com.sun.jna.Native;
public class NlpirTest {
// 定义接口CLibrary,继承自com.sun.jna.Library
public interface CLibrary extends Library {
// 定义并初始化接口的静态变量
CLibrary Instance = (CLibrary) Native.loadLibrary(
"C:\\NLPIR-ICTCLAS2016\\lib\\win64\\NLPIR", CLibrary.class);
public int NLPIR_Init(String sDataPath, int encoding,
String sLicenceCode);
public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,
boolean bWeightOut);
public int NLPIR_AddUserWord(String sWord);//add by qp 2008.11.10
public int NLPIR_DelUsrWord(String sWord);//add by qp 2008.11.10
public String NLPIR_GetLastErrorMsg();
public void NLPIR_Exit();
}
public static String transString(String aidString, String ori_encoding,
String new_encoding) {
try {
return new String(aidString.getBytes(ori_encoding), new_encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
String argu = "C:\\NLPIR-ICTCLAS2016";
// String system_charset = "GBK";//GBK----0
@SuppressWarnings("unused")
String system_charset = "UTF-8";
int charset_type = 1;
int init_flag = CLibrary.Instance.NLPIR_Init(argu, charset_type, "0");
String nativeBytes = null;
String nativeByte = null;
ArrayList<String> name = new ArrayList<String>();
ArrayList<String> classify = new ArrayList<String>();
if (0 == init_flag) {
nativeBytes = CLibrary.Instance.NLPIR_GetLastErrorMsg();
System.err.println("初始化失败!fail reason is "+nativeBytes);
return;
}
try {
nativeByte = CLibrary.Instance.NLPIR_GetFileKeyWords("C:\\专利文献全文获取_xpdf.txt", 10,false);
System.out.println("关键词提取结果是:" + nativeByte);
String file="C:\\专利文献全文获取_xpdf.txt";
String sinputt= MyFileReader.read(file);
nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(sinputt, 1);
System.out.println("分词结果为: " + nativeBytes);
CLibrary.Instance.NLPIR_Exit();
//以空格分离,把每个词/v分别存到数组里
String[] nativeBytesArray=nativeBytes.split(" ");
MyFileSave save=new MyFileSave();
save.Save(nativeBytesArray);
}
} catch (Exception ex) {
// TODO Auto-generated catch block
ex.printStackTrace();
}
}
}
MyFileRead.java
package code;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
public class MyFileReader{
public static String read(String filePath) {
String result = null;
try {
String encoding="utf-8";
File file=new File(filePath);
if(file.isFile() && file.exists()){ //判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
result = "";
while((lineTxt = bufferedReader.readLine()) != null){
//System.out.println(lineTxt);
result+= lineTxt;
}
read.close();
}else{
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
return result;
}
}
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
public class MyFileReader{
public static String read(String filePath) {
String result = null;
try {
String encoding="utf-8";
File file=new File(filePath);
if(file.isFile() && file.exists()){ //判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
result = "";
while((lineTxt = bufferedReader.readLine()) != null){
//System.out.println(lineTxt);
result+= lineTxt;
}
read.close();
}else{
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
return result;
}
}
MyFileSave.java
package code;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
public class MyFileSave {
public void Save(String[] a){
//String rootPath="C:\\";
FileOutputStream foS=null;
try {
foS=new FileOutputStream("C:\\专利文献全文获取分词结果.txt",true);//第二个参数为是否设置追加文件
PrintWriter pWriter=new PrintWriter(foS);
for(int i=0;i<a.length;i++){
pWriter.write(a[i]+" ");
}
pWriter.flush();
} catch (FileNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
foS.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}
}
}
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
public class MyFileSave {
public void Save(String[] a){
//String rootPath="C:\\";
FileOutputStream foS=null;
try {
foS=new FileOutputStream("C:\\专利文献全文获取分词结果.txt",true);//第二个参数为是否设置追加文件
PrintWriter pWriter=new PrintWriter(foS);
for(int i=0;i<a.length;i++){
pWriter.write(a[i]+" ");
}
pWriter.flush();
} catch (FileNotFoundException e) {
// TODO: handle exception
e.printStackTrace();
}finally{
try {
foS.close();
} catch (Exception e2) {
// TODO: handle exception
e2.printStackTrace();
}
}
}
}
四:运行
运行结果展示:
完结!
0 0
- java用NLPIR对本地txt进行分词,并将分词结果写入本地
- 使用NLPIR汉语分词工具进行中文分词(java语言)
- NLPIR中文分词 java
- 中科院分词(NLPIR) JAVA
- 使用NLPIR汉语分词系统进行分词
- 使用NLPIR 进行中文分词并标注词性
- NLPIR(ICTCLAS2016)对文本进行分词
- Python调用NLPIR/ICTCLAS进行文本分词
- 【python】使用中科院NLPIR分词工具进行mysql数据分词
- jieba分词并写入到TXT文本中
- NLPIR(ICTCLAS2015)分词工具Java开发简介
- 使用Java调用中科院分词NLPIR/ICTCLAS
- java实现NLPIR(ICTCLAS)分词
- 使用Java调用中科院分词NLPIR/ICTCLAS
- NLPIR中文分词的java接口使用方法
- 中科院分词系统(NLPIR)JAVA简易教程
- 中科院NLPIR中文分词java版
- 简单的NLPIR分词 JAVA 实现
- Java之XMemcached使用及源码详解
- centos6.5下使用yum安装mysql
- 设计模式-工厂方法
- 结果集的数据转换
- Syntax error, parameterized types are only available if source level is 1.5 or greater
- java用NLPIR对本地txt进行分词,并将分词结果写入本地
- POJ 3260 The Fewest Coins(完全背包+多重背包(转化为01背包))
- kali2.0 msf连接数据库及启动armitage
- getBackground().setAlpha(0)设置透明度连下层控件背景色都被改变
- java动态数组
- [fluentd学习]安装使用
- CSP最大的矩形
- 第四次程序设计作业 C++计算器计算及命令行的使用 前缀表达式方法实现
- HDFS详解