读取Lucene 索引数据
来源:互联网 发布:如何看待网络语言 编辑:程序博客网 时间:2024/06/01 17:58
前一阵帮朋友写了一个读取Lucene 数据文件的一个小应用,在这里分享给大家 ,这个代码是基于 Lucene 3.6版本。
package com.pushine;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Collection;import java.util.Enumeration;import java.util.Iterator;import java.util.List;import sun.nio.cs.ext.*;import org.apache.lucene.document.Document;import org.apache.lucene.document.Fieldable;//import org.apache.lucene.document.Fieldable;//import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.FieldInfos;import org.apache.lucene.index.IndexReader;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class AnalyzeCFS {public ArrayList<String> headers = new ArrayList<String>();public ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();public CheckCode cc = new CheckCode();public void analyze(String filedir1) throws IOException {String filedir = filedir1;//读取Lucene 数据文件目录 Directory dir = FSDirectory.open(new File(filedir));//获取IndexReader对象 IndexReader indexReader = IndexReader.open(dir);//获取数据数量 int max = indexReader.maxDoc();//获取索引列名headers = this.getColName(indexReader);//获取索引数据 datas = this.getData(indexReader, max);}public ArrayList<ArrayList<String>> getData(IndexReader indexReader,int max) throws IOException{ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();ArrayList<String> row ;int dataindex = 0;//遍历所有Document节点 提取数据 for (int n = 0; n < max; n++) {row = new ArrayList<String>();//获取Document节点 Document document = indexReader.document(n);//获取FieldInfo 对象 FieldInfos FInfo = null;FInfo = indexReader.getFieldInfos();//初始迭代器,遍历Iterator iter = FInfo.iterator();int index = 0;row.add(index,Integer.toString(n));index++;//遍历Document中每一个列对应的值 for (Iterator iterator = FInfo.iterator(); iterator.hasNext();) {String s = (String) iterator.next();String value = null;if(document.get(s) != null ){ArrayList<String> cnlist = cc.getcnlist();String cncharname ;for(String cn:cnlist){cc.check(document.get(s));value = new String(document.get(s).getBytes(cn),"GBK");System.out.println("char set :" + cn);System.out.println("change value:" + value);}if(value.startsWith("^")){value=value.substring(1);}}row.add(index, value);index++;System.out.print( s + ":" + value + ",");}datas.add(dataindex, row);dataindex++;}return datas;}public ArrayList<String> getColName(IndexReader indexReader) throws IOException {ArrayList<String> colNames = new ArrayList<String>();Document document = indexReader.document(0);FieldInfos conn = null;List<Fieldable> idex = document.getFields();int index = 0;conn = indexReader.getFieldInfos();Iterator iter = conn.iterator();colNames.add(index, "rowNum");index++;for (Iterator iterator = conn.iterator(); iterator.hasNext();) {String s = (String) iterator.next();colNames.add(index, s);index++;}this.headers = colNames;return colNames;}public ArrayList<String> getColNames(){return this.headers;}public ArrayList<ArrayList<String>> getDatas() {return datas;}public static void main(String[] args) {ExportExcel expExcel = new ExportExcel();AnalyzeCFS analyzeCFS = new AnalyzeCFS();expExcel.createFile();if(args.length < 1){System.out.println(args.length);System.out.println(args[0] + " " + args[1]);System.out.println("please give file directory ");System.out.println(" eg java -jar AnalyzeCFS g:\\index");System.exit(0);}String filedir = args[0];try {analyzeCFS.analyze(filedir);
expExcel.createSheet(sheetindex);expExcel.insertHeaders(analyzeCFS.getColNames());for(int i=0; i<analyzeCFS.getDatas().size() ; i++ ){System.out.println("row number : " + i );expExcel.createSheet(sheetindex +2);expExcel.insertHeaders(analyzeCFS.getColNames());expExcel.insertRow(analyzeCFS.getDatas().get(i));}System.out.println("summary Lines : " + analyzeCFS.getDatas().size() );} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}expExcel.saveFile();}}
0 0
- 读取Lucene 索引数据
- 如何读取Lucene索引数据
- Lucene数据索引搜索示例
- 数据检索---基于Lucene索引
- 读取lucene索引域中的term
- lucene.net 基本的读取索引
- lucene读取索引文件到内存
- lucene创建索引读取索引简单测试--笔记
- lucene笔记-001-新建索引和读取索引
- lucene中海量数据索引问题
- 读取PNG颜色索引数据
- lucene读取索引文件到内存-源码解析
- 使用Lucene的IndexReader读取索引文件的信息
- 如何读取搜索创建的Lucene索引内容
- lucene 索引
- Lucene 索引
- Lucene索引
- Lucene 索引
- c#读写txt文件
- 群延迟解释(FIR线性相位的解释)
- 网络爬虫系列之一:通过URL下载网页
- iOS学习之自定义弹出UIPickerView或UIDatePicker(动画效果)
- iOS method swizzling详解3
- 读取Lucene 索引数据
- 基于Qt的二维码生成器
- 选择ORACLE数据库字符集
- android开发对Webview的应用
- 交叉编译openssl
- PB获取DW.DS的参数名、类型、参数的值
- 关于CISCO NX-OS licensing
- 死亡之屋show girl惊艳登场 华丽首曝
- 表格驱动的单元测试技术(1)