Pactice Every Day_15
来源:互联网 发布:英雄无敌3hd for mac 编辑:程序博客网 时间:2024/06/15 19:27
今天完成了对特定格式文件的搜索还有通过文件大小进行搜索,感觉就是对前几天学过去的内容复习了一下,没多少新的东西。今天晚上看了看html的讲解,感觉选修课上的作业可以交上了。
今天的代码:
//建立索引并实现几种搜索功能:
package MySearcher;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexAndSearcher
{
private final static Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_35);
private static Directory directory=null;
public static void index_01()
{
IndexWriter writer=null;
try
{
directory=FSDirectory.open(new File("f:Index_01"));
writer=new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
File file=new File("e:/Lucene实例/example/");
Document doc=null;
for(File f:file.listFiles())
{
doc=new Document();
doc.add(new Field("content",new FileReader(f)));
doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(f.lastModified()));
doc.add(new NumericField("size",Field.Store.YES,true).setIntValue((int)(f.length())));
writer.addDocument(doc);
}
writer.close();
}
catch(CorruptIndexException e)
{
e.printStackTrace();
}
catch(LockObtainFailedException e)
{
e.printStackTrace();
}
catch(IOException e)
{
e.printStackTrace();
}
}
public static void check() throws IOException
{ //检查索引是否被正确建立(打印索引)
FSDirectory directory = FSDirectory.open(new File("f:Index_04"));
IndexReader reader = IndexReader.open(directory);
for(int i = 0;i<reader.numDocs();i++)
{
System.out.println(reader.document(i));
}
}
//实现功能:f1域中一定含有s1,f2中一定不含有s2,f3中可以含有s3
public static void searcher1(String f1,String s1,String f2,String s2,String f3,String s3,int n) throws IOException
{
directory=FSDirectory.open(new File("f:Index_01"));
IndexReader reader=IndexReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
Term t1=new Term(f1,s1);
FuzzyQuery q1=new FuzzyQuery(t1,0.1f);
Term t2=new Term(f2,s2);
FuzzyQuery q2=new FuzzyQuery(t2,0.1f);
Term t3=new Term(f3,s3);
FuzzyQuery q3=new FuzzyQuery(t3,0.1f);
BooleanQuery q=new BooleanQuery();
q.add(q1,BooleanClause.Occur.MUST);
q.add(q2,BooleanClause.Occur.MUST_NOT);
q.add(q3,BooleanClause.Occur.SHOULD);
TopDocs tds=searcher.search(q, n);
ScoreDoc[] sds=tds.scoreDocs;
System.out.println("共搜到"+tds.totalHits+"条结果,"+"以下显示"+n+"条");
for(ScoreDoc sd:sds)
{
Document d=searcher.doc(sd.doc);
System.out.println("路径为:"+d.get("path")+" 文件名为:"+d.get("filename")+" 文件大小为"
+d.get("size")+" 文件修改日期为:"+new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format
(new Date(Long.valueOf(d.get("date")))));
}
}
//实现功能:搜索某个格式的文件,并且对其大小进行限定后显示搜索结果
public static void searcher2(String type,int size,int n) throws IOException, ParseException
{
directory=FSDirectory.open(new File("f:Index_01"));
IndexReader reader=IndexReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
Term t1=new Term("filename","*."+type);
WildcardQuery q1=new WildcardQuery(t1);
Query q2=NumericRangeQuery.newIntRange("size",size-1000,size+1000,true,true);
BooleanQuery q=new BooleanQuery();
q.add(q1,BooleanClause.Occur.MUST);
q.add(q2,BooleanClause.Occur.MUST);
TopDocs tds=searcher.search(q, n);
ScoreDoc[] sds=tds.scoreDocs;
System.out.println("共搜到"+tds.totalHits+"条结果,"+"以下显示"+n+"条");
for(ScoreDoc sd:sds)
{
Document d=searcher.doc(sd.doc);
System.out.println("路径为:"+d.get("path")+" 文件名为:"+d.get("filename")+" 文件大小为"
+d.get("size")+" 文件修改日期为:"+new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format
(new Date(Long.valueOf(d.get("date")))));
}
}
//实现功能:搜索特定格式的文件
public static void searcher3(String type,int n) throws IOException, ParseException
{
directory=FSDirectory.open(new File("f:Index_01"));
IndexReader reader=IndexReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
Term t1=new Term("filename","*."+type);
WildcardQuery q=new WildcardQuery(t1);
TopDocs tds=searcher.search(q, n);
ScoreDoc[] sds=tds.scoreDocs;
System.out.println("共搜到"+tds.totalHits+"条结果,"+"以下显示"+n+"条");
for(ScoreDoc sd:sds)
{
Document d=searcher.doc(sd.doc);
System.out.println("路径为:"+d.get("path")+" 文件名为:"+d.get("filename")+" 文件大小为"
+d.get("size")+" 文件修改日期为:"+new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format
(new Date(Long.valueOf(d.get("date")))));
}
}
//实现功能:搜索在指定大小范围内的文件
public static void searcher4(int start,int end,int n) throws IOException, ParseException
{
directory=FSDirectory.open(new File("f:Index_01"));
IndexReader reader=IndexReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
Query q=NumericRangeQuery.newIntRange("size",start,end,true,true);
TopDocs tds=searcher.search(q, n);
ScoreDoc[] sds=tds.scoreDocs;
System.out.println("共搜到"+tds.totalHits+"条结果,"+"以下显示"+n+"条");
for(ScoreDoc sd:sds)
{
Document d=searcher.doc(sd.doc);
System.out.println("路径为:"+d.get("path")+" 文件名为:"+d.get("filename")+" 文件大小为"
+d.get("size")+" 文件修改日期为:"+new SimpleDateFormat("yyyy-MM-dd hh:mm:ss").format
(new Date(Long.valueOf(d.get("date")))));
}
}
}
//测试类:
package MySearcher;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
public class Test {
public static void main(String[] args) throws IOException ,CorruptIndexException, ParseException
{
String f1="content",s1="java",
f2="filename",s2="java",
f3="path",s3="",
type="java";
int n=1895, start=500,end=3000 ;
System.out.println("一定包含: 域"+f1+"里的"+s1);
System.out.println("一定不包含: 域"+f2+"里的"+s2);
System.out.println("可以包含: 域"+f3+"里的"+s3);
System.out.println("搜索结果为");
IndexAndSearcher.index_01();
IndexAndSearcher.searcher1(f1, s1, f2, s2, f3, s3, 10);
System.out.println();
System.out.println("搜索"+type+"文件并且文件大小在"+String.valueOf(n)+"左右 搜索结果为:");
IndexAndSearcher.searcher2(s2,n,10);
System.out.println();
System.out.println("搜索文件类型为"+type+"的 结果为:");
IndexAndSearcher.searcher3(s2,10);
System.out.println();
System.out.println("搜索大小在"+String.valueOf(start)+"~"+String.valueOf(end)+"之间的文件"+" 结果为:");
IndexAndSearcher.searcher4(start,end,10);
}
}
不知不觉写博客有两个星期了,假期也快结束了,明天起要学习新内容了,加油啊!
- Pactice Every Day_15
- PHP5 in Pactice中文版
- every time
- Every Step
- entries、every
- Ubiquitous Computing: Every where, Every day, Everybody
- Every Woman is Beautiful!
- Every woman is beautiful
- Welcome!Every Friend^_^
- Every Object responds to
- interaction is every thing
- Hello every high hander
- [英语学习]Every other
- Beauty every where
- just like every time
- do it every day
- every thing start now
- happy every day !
- UIScrollView 滚动视图 (实例)
- 网页中播放FLV文件的代码
- Select数据库查询语句整理
- Python:编程“八荣八耻”之我见
- has a / is a 的区别
- Pactice Every Day_15
- Format格式字符串
- Exercises 4-13
- linux boardcom 无线网卡安装
- 并发与竞态(笔记)
- 数学之美番外篇:平凡而又神奇的贝叶斯方法
- lamp编译之常用参数
- 【大学生活】幸运的是还有时间
- Linux新手生存笔记[2]——vim训练稿