读取Lucene 索引数据

来源:互联网 发布:如何看待网络语言 编辑:程序博客网 时间:2024/06/01 17:58

前一阵帮朋友写了一个读取Lucene 数据文件的一个小应用,在这里分享给大家 ,这个代码是基于 Lucene 3.6版本。

package com.pushine;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Collection;import java.util.Enumeration;import java.util.Iterator;import java.util.List;import sun.nio.cs.ext.*;import org.apache.lucene.document.Document;import org.apache.lucene.document.Fieldable;//import org.apache.lucene.document.Fieldable;//import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.FieldInfos;import org.apache.lucene.index.IndexReader;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class AnalyzeCFS {public ArrayList<String> headers = new ArrayList<String>();public ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();public CheckCode cc = new CheckCode();public void analyze(String filedir1) throws IOException {String filedir = filedir1;//读取Lucene 数据文件目录 Directory dir = FSDirectory.open(new File(filedir));//获取IndexReader对象 IndexReader indexReader = IndexReader.open(dir);//获取数据数量 int max = indexReader.maxDoc();//获取索引列名headers = this.getColName(indexReader);//获取索引数据 datas = this.getData(indexReader, max);}public ArrayList<ArrayList<String>> getData(IndexReader indexReader,int max) throws  IOException{ArrayList<ArrayList<String>> datas = new ArrayList<ArrayList<String>>();ArrayList<String> row ;int dataindex = 0;//遍历所有Document节点 提取数据 for (int n = 0; n < max; n++) {row = new ArrayList<String>();//获取Document节点 Document document = indexReader.document(n);//获取FieldInfo 对象 FieldInfos FInfo = null;FInfo = indexReader.getFieldInfos();//初始迭代器,遍历Iterator iter = FInfo.iterator();int index = 0;row.add(index,Integer.toString(n));index++;//遍历Document中每一个列对应的值 for (Iterator iterator = FInfo.iterator(); iterator.hasNext();) {String s = (String) iterator.next();String value = null;if(document.get(s) != null ){ArrayList<String> cnlist = cc.getcnlist();String cncharname ;for(String cn:cnlist){cc.check(document.get(s));value = new String(document.get(s).getBytes(cn),"GBK");System.out.println("char set :" + cn);System.out.println("change value:" + value);}if(value.startsWith("^")){value=value.substring(1);}}row.add(index, value);index++;System.out.print( s + ":" + value + ",");}datas.add(dataindex, row);dataindex++;}return datas;}public ArrayList<String> getColName(IndexReader indexReader) throws IOException {ArrayList<String> colNames = new ArrayList<String>();Document document = indexReader.document(0);FieldInfos conn = null;List<Fieldable>  idex = document.getFields();int index = 0;conn = indexReader.getFieldInfos();Iterator iter = conn.iterator();colNames.add(index, "rowNum");index++;for (Iterator iterator = conn.iterator(); iterator.hasNext();) {String s = (String) iterator.next();colNames.add(index, s);index++;}this.headers = colNames;return colNames;}public ArrayList<String> getColNames(){return this.headers;}public ArrayList<ArrayList<String>> getDatas() {return datas;}public static void main(String[] args) {ExportExcel expExcel = new ExportExcel();AnalyzeCFS analyzeCFS = new AnalyzeCFS();expExcel.createFile();if(args.length < 1){System.out.println(args.length);System.out.println(args[0] + "  " + args[1]);System.out.println("please give file directory ");System.out.println(" eg  java -jar AnalyzeCFS  g:\\index");System.exit(0);}String filedir = args[0];try {analyzeCFS.analyze(filedir);
expExcel.createSheet(sheetindex);expExcel.insertHeaders(analyzeCFS.getColNames());for(int i=0; i<analyzeCFS.getDatas().size() ; i++ ){System.out.println("row number : " + i );expExcel.createSheet(sheetindex +2);expExcel.insertHeaders(analyzeCFS.getColNames());expExcel.insertRow(analyzeCFS.getDatas().get(i));}System.out.println("summary Lines : " + analyzeCFS.getDatas().size() );} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}expExcel.saveFile();}} 


0 0
原创粉丝点击