lucene 分词处理中文
来源:互联网 发布:服装批发软件 编辑:程序博客网 时间:2024/09/21 06:35
package lia.analysis.i18n;import org.apache.lucene.analysis.tokenattributes.TermAttribute;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.SimpleAnalyzer;import org.apache.lucene.analysis.cjk.CJKAnalyzer;import org.apache.lucene.analysis.cn.ChineseAnalyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.util.Version;import java.awt.Font;import java.awt.FontMetrics;import java.awt.Frame;import java.awt.Label;import java.io.IOException;import java.io.StringReader;// From chapter 4public class ChineseDemo { private static String[] strings = {"道德經"}; //A private static Analyzer[] analyzers = { new SimpleAnalyzer(), new StandardAnalyzer(Version.LUCENE_30), new ChineseAnalyzer (), //B new CJKAnalyzer (Version.LUCENE_30) }; public static void main(String args[]) throws Exception { for (String string : strings) { for (Analyzer analyzer : analyzers) { analyze(string, analyzer); } } } private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while(stream.incrementToken()) { //C buffer.append("["); buffer.append(term.term()); buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead Label label = new Label(output); //D label.setSize(width, 75); label.setAlignment(Label.CENTER); label.setFont(font); f.add(label); f.setVisible(true); } private static int getWidth(FontMetrics metrics, String s) { int size = 0; int length = s.length();for (int i = 0; i < length; i++) { size += metrics.charWidth(s.charAt(i)); } return size; }}
0 0
- lucene 分词处理中文
- Lucene中文分词
- Lucene中文分词“庖丁解牛”
- Lucene中文分词
- Lucene 2.3 中文分词
- Lucene中文分词
- Lucene中文分词
- lucene-NGram中文分词
- lucene-JE中文分词
- GTAnalyzer-lucene中文分词
- Lucene整理--中文分词
- lucene中文分词
- Lucene与中文分词
- Lucene中文分词介绍
- Lucene中文分词Jcseg
- Lucene中文分词Paoding
- Lucene中文分词mmseg4j
- Lucene中文分词Demo
- MyEclipse启动Tomcat出现Exception in thread "main" java.lang.OutOfMemoryError: PermGen space
- 并发用户数和QPS
- 关于微信检测SDK应用的原理浅析(iOS)
- objective-c中Category类别(扩展类)专题总结
- ios中小心NULL的情况
- lucene 分词处理中文
- 数据结构之简单栈
- 浅析 Java Thread.join()
- 本地通知和推送通知 - iOS开发
- mongoDB的两个特性:主从复制、副本集
- 基于linux-2.6.35的网络视频服务器移植和ZC301摄像头的驱动支持
- Android xml资源文件中@、@android:type、@*、?、@+含义和区别
- 流水线技术
- NSMutableURLRequest 请求头的设置