文档检索与在线预览

来源:互联网 发布:485通讯端口电压多少 编辑:程序博客网 时间:2024/05/07 22:23

一、技术配置和实现思路

技术配置:
Lucene用于创建索引和搜寻
poi技术用于office文件的内容读取
openoffice用于文档转换成PDF
pdf.js用户对文档的展示

实现思路:
这里写图片描述

效果图:
这里写图片描述
这里写图片描述

二、技术简介

本文使用的jar包:
http://download.csdn.net/detail/liu_c_y/9830971
Lucene
Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。Lucene是一套用于全文检索和搜寻的开源程式库,由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言,Lucene是当前以及最近几年最受欢迎的免费Java信息检索程序库。
相关学习博客
http://www.cnblogs.com/forfuture1978/category/300665.html

创建索引和相关注意点:
1.Lucene不同的版本实现的方法略有区别,且版本较多,本文使用的版本是5.2.1
2.Lucene具有多种分词器,不同的分词器分词的效果不同,其中对中文分词时使用第三方中文分词技术IKAnalyzer()效果较佳。

package lucene;import java.io.FileReader;import java.io.IOException;import java.io.StringReader;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.document.Document;import org.apache.lucene.document.DoubleField;import org.apache.lucene.document.Field;import org.apache.lucene.document.IntField;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class Lucene {    private static String str="原济南军区参谋长张鸣的辞职原因首次由官方披露。 analyzer 中国人大网8月22日发布的《第十二届全国人民代表大会常务委员会代,lucene ";    public static void main(String[] args) throws Exception {        /*         * 1.创建分词器         */        Analyzer luceneAnalyzer4 = new IKAnalyzer();//第三方中文分词技术IKAnalyzer()默认最细粒度切分等同于IKAnalyzer(false),IKAnalyzer(true)智能切分         //Exception in thread "main" java.lang.VerifyError: class org.wltea.analyzer.lucene.IKAnalyzer overrides final method tokenStream.(Ljava/lang/String;Ljava/io/Reader;)        //Lorg/apache/lucene/analysis/TokenStream; 版本不一致造成的错误        //Exception in thread "main" java.lang.AbstractMethodError at org.apache.lucene.analysis.Analyzer.tokenStream版本不一致造成的错误        prin(luceneAnalyzer4);//打印出分词的效果        /*         * 2.创建索引的配置         */        IndexWriterConfig iwc = new IndexWriterConfig(luceneAnalyzer4);//创建索引的配置,配置索引的相关配置        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);        //OpenMode.CREATE 打开索引方式创建或覆盖,存在就覆盖,不存在就创建        //OpenMode.APPEND 追加,索引还会重复,导致返回多次结果,如果不存在索引还会抛出异常:org.apache.lucene.index.IndexNotFoundException         //OpenMode.CREATE_OR_APPEND 如果不存在则创建,否则追加        /*         * 3.索引文档的存储位置,FSDirectory将索引写入文件系统,RAMDirectory索引文档写入内存,一重启就会丢失         */         Directory directory = FSDirectory.open(Paths.get("f://luceneIndex"));//f://luceneIndex索引存放的文件夹         //Directory directory1 = new RAMDirectory();         /*          * 4.创建索引,一个索引包含多个文档          */        IndexWriter indexWriter = new IndexWriter(directory, iwc);//一个索引包含多个文档        /*         * 5.创建一个文档,一个文档包含多个域         * 例如:一篇文章可以包含标题域,作者域,文章内容域         */        Document doc = new Document();//创建一个文档,一个文档包含多个域        /*         * 6.创建域         */        Field field = new StringField("域名", "内容", Field.Store.NO);//创建一个域,一个文档包含多个域,此处的内容可以来自文本,数据库等等        //Field.Store.YES 可以索引,保存内容,当索引的时候可以取出内容        //Field.Store.NO 可以索引,不保存内容,当索引的时候不能取出内容,因为没有保存,当文件内容很大时建议不保存        //Field.Store.COMPRESS 可以索引,用于长文本或二进制压缩保存内容节省空间,当索引的时候可以取出内容,但是5版本已经没有了        //StringFiled就是NOT_ANALYZED,不对域的内容进行分割分析        Field field1 = new TextField("域名二","内容,原济南军区参谋长张鸣的辞职原因首次由官方披露"                + "。中国人大网8月22日发布的《第十二届全国人民代表大会常务委员会代",Field.Store.YES);        //TextField就是ANALYZED,对域的内容进行分割分析,也可以不指定Field.Store.YES,例如下方代码        Field field2 = new TextField("域名三",new FileReader("f://source/123.txt"));        Field field4 = new DoubleField("域名五",45.9,Field.Store.YES);        Field field5 = new IntField("域名六",50,Field.Store.YES);        Field field6 = new LongField("域名七",(long) 1234567890,Field.Store.YES);        doc.add(field);//一个文档可以包含多个域        doc.add(field1);        doc.add(field2);        doc.add(field4);        doc.add(field5);        doc.add(field6);        indexWriter.addDocument(doc);//添加一个文档        System.out.println(indexWriter.numDocs());//打印索引中包含的文档数        indexWriter.close();    }    public static void prin(Analyzer analyzer){        StringReader stringReader = new StringReader(str);        try {            TokenStream tokenStream = analyzer.tokenStream("", stringReader);            tokenStream.reset();            CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class);            System.out.println("分词技术:"+analyzer.getClass());            while(tokenStream.incrementToken()){                System.out.print(term.toString()+" | ");            }            System.out.println();            System.out.println();            tokenStream.close();        } catch (IOException e) {            e.printStackTrace();        }    }}

搜索时:

package lucene;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.wltea.analyzer.lucene.IKAnalyzer;public class test {    public static void main(String[] args) throws Exception {    /*     * 1.打开索引存储位置,     * 索引存储位置:文件系统或者内存     */    Directory directory = FSDirectory.open(Paths.get("f://luceneIndex"));//索引存储在文件系统    DirectoryReader directoryReader = DirectoryReader.open(directory);//打开索引    /*     *2.创建搜索器     */    IndexSearcher searcher = new IndexSearcher(directoryReader);    /*     * 3.创建搜索条件     */    Analyzer analyzer = new IKAnalyzer();//分词器,第三方中文分词器    QueryParser queryParser = new QueryParser("域名二",analyzer);//QueryParser("搜索哪个域field",指定分词器);    Query query = queryParser.parse("济南军区好样的");//需要搜索的关键词或内容    /*     * 搜索     */    if(searcher != null){        //搜索结果        TopDocs results = searcher.search(query, 5);//排在前五的结果        //获取搜索结果        ScoreDoc[] scoreDocs = results.scoreDocs;        for (ScoreDoc scoreDoc : scoreDocs) {            Document document = searcher.doc(scoreDoc.doc);            System.out.println(document.get("域名二"));        }        if(scoreDocs.length>0){            System.out.println("搜索到有几条数据:" + scoreDocs.length);        }else{            System.out.println("搜索到有几条数据:0");        }    }    }}

*openoffice软件
OpenOffice.org 是一套跨平台的办公室软件套件,能在Windows、Linux、MacOS X (X11)和 Solaris 等操作系统上执行。它与各个主要的办公室软件套件兼容。OpenOffice.org 是自由软件,任何人都可以免费下载、使用及推广它。
使用:
安装openoffice,用Java调用它的服务进行对文本生成PDF

 public void Word2Pdf(String srcPath, String desPath) throws IOException {        // 源文件目录        File inputFile = new File(srcPath);        if (!inputFile.exists()) {            System.out.println("源文件不存在!");            return;        }        // 输出文件目录        File outputFile = new File(desPath);        if (!outputFile.getParentFile().exists()) {            outputFile.getParentFile().exists();        }        // 调用openoffice服务线程        String command = "  C:/Program Files (x86)/OpenOffice 4/program/soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8100;urp;\"";        Process p = Runtime.getRuntime().exec(command);        // 连接openoffice服务        OpenOfficeConnection connection = new SocketOpenOfficeConnection(                "127.0.0.1", 8100);        connection.connect();        // 转换word到pdf        DocumentConverter converter = new OpenOfficeDocumentConverter(                connection);        converter.convert(inputFile, outputFile);        // 关闭连接        connection.disconnect();        // 关闭进程        p.destroy();        System.out.println("转换完成!");    }

pdf.js
pdf.js 是一个技术原型主要用于在 HTML5 平台上展示 PDF 文档,无需任何本地技术支持。
https://github.com/mozilla/pdf.js/

三、文档索引与搜寻

package com.services;import java.io.BufferedInputStream;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.StringWriter;import java.nio.file.Paths;import java.util.ArrayList;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.pdfbox.cos.COSDocument;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.text.PDFTextStripper;import org.apache.poi.POIXMLDocument;import org.apache.poi.POIXMLTextExtractor;import org.apache.poi.hslf.extractor.PowerPointExtractor;import org.apache.poi.hssf.extractor.ExcelExtractor;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hwpf.extractor.WordExtractor;import org.apache.poi.openxml4j.exceptions.OpenXML4JException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;import org.apache.poi.xslf.usermodel.XMLSlideShow;import org.apache.poi.xssf.usermodel.XSSFCell;import org.apache.poi.xssf.usermodel.XSSFRow;import org.apache.poi.xssf.usermodel.XSSFSheet;import org.apache.poi.xssf.usermodel.XSSFWorkbook;import org.apache.poi.xwpf.extractor.XWPFWordExtractor;import org.apache.xmlbeans.XmlException;import org.springframework.stereotype.Service;import com.artofsolving.jodconverter.DocumentConverter;import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;import com.lowagie.text.Font;import com.lowagie.text.PageSize;import com.lowagie.text.Paragraph;import com.lowagie.text.pdf.BaseFont;import com.lowagie.text.pdf.PdfWriter;import com.po.QuestionQuery;@Servicepublic class LuceneServiceImpl implements LuceneService {    private static POIXMLTextExtractor ex;    private static WordExtractor extractor;    private BufferedReader in;    private BufferedInputStream bin;    private ExcelExtractor extractor2;    private XSSFWorkbook xwb;    /**     * 创建文件索引     * fileName 文件名称     * id 存储在数据库文件信息ID     */    @Override    public void createFileIndex(String fileName, int id) throws IOException {        String sourceDir = "G:\\exam\\source\\";//存储文件地方        String indexDir = "G:\\exam\\index\\";// 这里放索引文件的位置        String srcPath = sourceDir + fileName;//文件的路径        //生成源文件对应的PDF文件        String desPath = sourceDir + fileName.substring(0,fileName.lastIndexOf('.'))+".pdf";        IndexWriter indexWriter = null;        try {            // 创建一个分析器            Analyzer analyzer = new StandardAnalyzer();            // Directory是用于索引文件的存储的抽象类,其子类有将索引文件写到文件的            Directory directory = FSDirectory.open(Paths.get(indexDir));            // 配置并新建索引            IndexWriterConfig config = new IndexWriterConfig(analyzer);            indexWriter = new IndexWriter(directory, config);            File file = new File(sourceDir + fileName);// 往索引中写入文档            if (file.isFile()) {                //获取文件格式,然后获取文件内文件内容,图片除外,然后转换为PDF                String postfix = fileName                        .substring(fileName.lastIndexOf('.') + 1);                String temp = null;                //word文档                if ("doc".equalsIgnoreCase(postfix)                        || "docx".equalsIgnoreCase(postfix)) {                    try {                        //读取文档的信息,用于分词建立索引                        temp = readWord2007(sourceDir + fileName);                    } catch (Exception e) {                        try {                            temp = readWord(sourceDir + fileName);                        } catch (Exception e1) {                            e1.printStackTrace();                        }                    }                    //转换成PDF文件,用于在线预览                    Word2Pdf(srcPath, desPath);                } else if ("txt".equalsIgnoreCase(postfix)) {                    temp = FileReaderAll(sourceDir + fileName);                    txt2pdf(srcPath,desPath);                } else if ("pptx".equalsIgnoreCase(postfix)                        || "ppt".equalsIgnoreCase(postfix)) {                    try {                        temp = getTextFromPPT(sourceDir + fileName);                    } catch (Exception e) {                        temp = getTextFromPPT2007(sourceDir + fileName);                    }                    Word2Pdf(srcPath, desPath);                } else if ("pdf".equalsIgnoreCase(postfix)) {                    temp = readPdf(sourceDir + fileName);                } else if ("xlsx".equalsIgnoreCase(postfix)){                    temp = readExcel2007(sourceDir + fileName);                    Word2Pdf(srcPath, desPath);                } else if ("xls".equalsIgnoreCase(postfix)){                    temp = readExcel(sourceDir + fileName);                    Word2Pdf(srcPath, desPath);                }                System.out.println(temp + " ");                Document document = new Document();// 新建文档                // 创建域                // 增加标题索引使用的是TextField,增加isbn索引使用的是StringField,                //这两个都是IndexableField的子类,TextField表示是会被拆分并且被索引的字段,而StringField只会一个整体被索引,而不会进行拆分索引。                Field fieldId = new TextField("id", String.valueOf(id), Field.Store.YES);                Field body = new TextField("body", temp, Field.Store.NO);                document.add(fieldId);                document.add(body);                indexWriter.addDocument(document);            }            System.out.println("被索引的文档个数:" + indexWriter.numDocs());        } catch (IOException e) {            e.printStackTrace();        } finally {            if (indexWriter != null) {                try {                    indexWriter.close();// 关闭writer                } catch (IOException e) {                    e.printStackTrace();                }            }        }       }    /**     * 搜索文档时     * key 搜索关键词     * 返回 文件ID list 在数据库中文档的路径和其他信息     */    @Override    public List<Integer> createSeacher(String key) throws IOException,            ParseException {        List<Integer> ids = new ArrayList<Integer>();        /*         * 1.打开索引存储位置, 索引存储位置:文件系统或者内存         */        Directory directory = FSDirectory.open(Paths.get("G:/exam/index/"));// 索引存储在文件系统        DirectoryReader directoryReader = DirectoryReader.open(directory);// 打开索引        /*         * 2.创建搜索器         */        IndexSearcher searcher = new IndexSearcher(directoryReader);        /*         * 3.创建搜索条件 IKAnalyzer();         */        Analyzer analyzer = new StandardAnalyzer();// 分词器,第三方中文分词器        QueryParser queryParser = new QueryParser("body", analyzer);// QueryParser("搜索哪个域field",指定分词器);        Query query = queryParser.parse(key);// 需要搜索的关键词或内容        /*         * 搜索         */        if (searcher != null) {            // 搜索结果            TopDocs results = searcher.search(query, 5);// 排在前五的结果            // 获取搜索结果            ScoreDoc[] scoreDocs = results.scoreDocs;            for (ScoreDoc scoreDoc : scoreDocs) {                Document document = searcher.doc(scoreDoc.doc);                System.out.println(document.get("id"));                ids.add(Integer.valueOf(document.get("id")));//获取相关文档ID            }        }        directory.close();        directoryReader.close();        return ids;    }    /**     * 删除文档索引     * id 文档ID     */    @Override    public void delete(String id) throws IOException {        Directory directory = FSDirectory.open(Paths.get("G:/exam/index"));//索引存储在文件系统        Analyzer analyzer = new StandardAnalyzer();        IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));        writer.deleteDocuments(new Term("id",id));//只能删除text域的        writer.commit();        writer.close();        directory.close();    }    /**     * 读取txt信息,注意文档的编码问题     * @param FileName 文件路径     * @return     * @throws IOException     */    private String FileReaderAll(String FileName)            throws IOException {        bin = new BufferedInputStream(new FileInputStream(FileName));            int value = (bin.read() << 8) + bin.read();            String code = null;              switch (value) {                case 0xefbb:                    code = "UTF-8";                    break;                case 0xfffe:                    code = "Unicode";                    break;                case 0xfeff:                    code = "UTF-16BE";                    break;                default:                    code = "GBK";            }          BufferedReader reader = new BufferedReader(new InputStreamReader(                new FileInputStream(FileName), code));        String line = new String();        String temp = new String();        while ((line = reader.readLine()) != null) {            temp += line;        }        reader.close();        return temp;    }    /**     * 处理word2003     * @param path     * @return     * @throws Exception     */    private String readWord(String path) throws Exception {        String bodyText = null;        InputStream inputStream = new FileInputStream(path);        extractor = new WordExtractor(inputStream);        bodyText = extractor.getText();        return bodyText;    }    /**     * 处理word2007     * @param path     * @return     * @throws Exception     */    private String readWord2007(String path) throws IOException,            OpenXML4JException, XmlException {        OPCPackage opcPackage = POIXMLDocument.openPackage(path);        ex = new XWPFWordExtractor(opcPackage);        return ex.getText();    }    /**     * 处理ppt2003     * @param filePath     * @return     */    private String getTextFromPPT(String filePath) {        InputStream is = null;        PowerPointExtractor extractor = null;        String text = "";        try {            is = new FileInputStream(filePath);            extractor = new PowerPointExtractor(is);            text = extractor.getText();            extractor.close();        } catch (FileNotFoundException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }        return text;    }    /**     * 读取PPT2007     *      * @param filePath     * @return     */    private String getTextFromPPT2007(String filePath) {        InputStream is = null;        XMLSlideShow slide = null;        String text = "";        try {            is = new FileInputStream(filePath);            slide = new XMLSlideShow(is);            XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(                    slide);            text = extractor.getText();            extractor.close();        } catch (FileNotFoundException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }        return text;    }    /**     * 处理pdf     * @param path     * @return     * @throws IOException     */    private String readPdf(String path) throws IOException {        StringBuffer content = new StringBuffer("");// 文档内容        PDDocument pdfDocument = null;        try {            FileInputStream fis = new FileInputStream(path);            PDFTextStripper stripper = new PDFTextStripper();            pdfDocument = PDDocument.load(fis);            StringWriter writer = new StringWriter();            stripper.writeText(pdfDocument, writer);            content.append(writer.getBuffer().toString());            fis.close();        } catch (java.io.IOException e) {            System.err.println("IOException=" + e);            System.exit(1);        } finally {            if (pdfDocument != null) {                COSDocument cos = pdfDocument.getDocument();                cos.close();                pdfDocument.close();            }        }        return content.toString();    }     /**      * 处理excel2003      * @param path      * @return      * @throws IOException      */      private  String readExcel(String path) throws IOException {          InputStream inputStream = null;          String content = null;          try {              inputStream = new FileInputStream(path);              HSSFWorkbook wb = new HSSFWorkbook(inputStream);              extractor2 = new ExcelExtractor(wb);              extractor2.setFormulasNotResults(true);              extractor2.setIncludeSheetNames(false);              content = extractor2.getText();          } catch (FileNotFoundException e) {              e.printStackTrace();          }          return content;      }       /**      * 处理excel2007      * @param path      * @return      * @throws IOException      */      private  String readExcel2007(String path) throws IOException {          StringBuffer content = new StringBuffer();          xwb = new XSSFWorkbook(path);          // 循环工作表Sheet          for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {              XSSFSheet xSheet = xwb.getSheetAt(numSheet);              if (xSheet == null) {                  continue;              }              // 循环行Row              for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {                  XSSFRow xRow = xSheet.getRow(rowNum);                  if (xRow == null) {                      continue;                  }                  // 循环列Cell                  for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {                      XSSFCell xCell = xRow.getCell(cellNum);                      if (xCell == null) {                          continue;                      }                      if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {                          content.append(xCell.getBooleanCellValue());                      } else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {                          content.append(xCell.getNumericCellValue());                      } else {                          content.append(xCell.getStringCellValue());                      }                  }              }          }          return content.toString();      }      /**     * 将office文件转换成pdf     * @param srcPath     * @param desPath     * @throws IOException     */    public void Word2Pdf(String srcPath, String desPath) throws IOException {        // 源文件目录        File inputFile = new File(srcPath);        if (!inputFile.exists()) {            System.out.println("源文件不存在!");            return;        }        // 输出文件目录        File outputFile = new File(desPath);        if (!outputFile.getParentFile().exists()) {            outputFile.getParentFile().exists();        }        // 调用openoffice服务线程        String command = "  C:/Program Files (x86)/OpenOffice 4/program/soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8100;urp;\"";        Process p = Runtime.getRuntime().exec(command);        // 连接openoffice服务        OpenOfficeConnection connection = new SocketOpenOfficeConnection(                "127.0.0.1", 8100);        connection.connect();        // 转换word到pdf        DocumentConverter converter = new OpenOfficeDocumentConverter(                connection);        converter.convert(inputFile, outputFile);        // 关闭连接        connection.disconnect();        // 关闭进程        p.destroy();        System.out.println("转换完成!");    }    /**     * txt转pdf     * @param srcPath     * @param desPath     */    private  void txt2pdf(String srcPath, String desPath) {        try {            //首先创建一个字体            BaseFont bfChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);            Font FontChinese = new Font(bfChinese, 12, Font.NORMAL);            String line = null;            com.lowagie.text.Document document;            document = new  com.lowagie.text.Document(PageSize.A4, 50, 50, 50, 50);            in = new BufferedReader(new InputStreamReader(new FileInputStream(srcPath),"UTF-8"));            PdfWriter.getInstance(document, new FileOutputStream(desPath));            document.open();            while ((line = in.readLine()) != null)                document.add(new Paragraph( line, FontChinese));            document.close();        }catch(Exception e) {            System.err.println(e.getMessage());        }    } }
2 0
原创粉丝点击