Lucene 4.0 [FS/SmartChinese...]

来源:互联网 发布:vb如何删除非打印字符 编辑:程序博客网 时间:2024/06/05 06:57

 

/**
     * 描述:测试检索关键字
     *
     * @param iPath
     * @param keyword
     */
    static void testSearch(String iPath, String keyword) {
        if (iPath == null || "".intern() == iPath.trim().intern()) {
            return;
        }
        final String fieldName = "contents";

        Query query = null;
        IndexSearcher searcher = null;
        try {
            query = new QueryParser(Version.LUCENE_40, fieldName, new SmartChineseAnalyzer(Version.LUCENE_40)).parse(keyword);

            searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File(iPath))));

            TopDocs hits = searcher.search(query, 100);// 匹配前100
            System.out.println((hits != null && hits.totalHits > 0) ? hits.totalHits : "没有找到匹配结果。");
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        catch (ParseException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        testSearch("f:\\test\\idx", "中国");
    }

 private static class IndexerTotal {

        static ArrayList<String> affects = new ArrayList<String>();

        static long total = 0;
    }

    /**
     * 描述:测试创建索引
     * 说明:InputStream流中编码格式必须与Source文件编码格式一致,否则导致中文无法正确索引。
     * @param iDir
     *            保存索引目录
     * @param sDir
     *            源文件目录
     * @return
     */
    static long testCreate(String iDir, String sDir) {
        if (iDir == null || sDir == null || "".intern() == (iDir.trim()).intern() || "".intern() == (sDir.trim()).intern()) {
            return 0;
        }
        return write(iDir, sDir);
    }

    private static long write(String iDir, String sDir) {
        IndexerTotal.total = 0;
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_40);
        try {
            long s = System.currentTimeMillis();
            File idx = new File(iDir);
            File source = new File(sDir);

            IndexWriter writer = null;
            Directory directory = null;
            IndexWriterConfig cfg = null;
            {
                directory = FSDirectory.open(idx);

                cfg = new IndexWriterConfig(Version.LUCENE_40, analyzer);
                cfg.setOpenMode(OpenMode.CREATE_OR_APPEND);

                writer = new IndexWriter(directory, cfg);
                addDocs(writer, source);
                // writer.commit();
                writer.close();
            }
            System.out.println("Affects files:");
            for (String fp : IndexerTotal.affects) {
                System.out.println("\t\t\t" + fp);
            }
            System.err.println("------------- Affects:" + IndexerTotal.total + "");
            System.err.println("------------- Timeout: " + (System.currentTimeMillis() - s) + " ms");
        }
        catch (Exception e) {
            System.err.println("------------- Error!");
            e.printStackTrace();
        }
        return IndexerTotal.total;
    }

    private static void addDocs(IndexWriter writer, File sourceFile) throws IOException {
        if (sourceFile.isDirectory()) {
            File[] files = sourceFile.listFiles();
            int next = 0;
            for (;;) {
                addDocs(writer, files[next]);
                next++;
                if (next == files.length) {
                    break;
                }
            }
        }
        else {
            addFileDocs(writer, sourceFile);
        }
    }

    private static void addFileDocs(IndexWriter writer, File sFile) {
        try {
            if (!sFile.canRead()) {
                System.out.println(sFile.getCanonicalPath() + " can not Read.");
            }
            Document document = new Document();
            FileInputStream fis = new FileInputStream(sFile);
            {
                Field field = new StringField("path", sFile.getPath(), Store.YES);
                document.add(field);
                field = new StringField("filename", sFile.getName(), Store.YES);
                document.add(field);
                field = new LongField("lastModified", sFile.lastModified(), Store.YES);
                document.add(field);
                field = new TextField("contents", new BufferedReader(new InputStreamReader(fis, "utf-8")));
                document.add(field);
                OpenMode mode = writer.getConfig().getOpenMode();
                if (mode == OpenMode.CREATE) {
                    // Add
                    writer.addDocument(document);
                }
                else {
                    // Update or Append
                    writer.updateDocument(new Term("path", sFile.getPath()), document);
                }
                IndexerTotal.affects.add(sFile.getPath());
            }
            fis.close();
            IndexerTotal.total++;
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        testCreate("f:/test/idx", "f:/test/source");
    }

 

 

原创粉丝点击