jsp页面上传Word,用POI读取word里的内容

来源:互联网 发布:知乎股份构成 编辑:程序博客网 时间:2024/06/05 14:44
工作需要完成一个Word的上传,然后读取数据,对数据进行比对得到需要的信息,然后把比对后的数据存入到数据库里。这个真是难倒我了,经过我很长时间的查找才实现了一个Word的读取,可以实现两个读取,一:直接读取全文信息,二:读取每一段的信息。数据的比对等我做出了接着更新。代码有参考别人的,出处忘了。。。



此程序只能实现固定地址上传Word,其他位置上传Word研究了一阵,没有找到获取路径的办法。。。


import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.PushbackInputStream;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;






/*
 *读取项目目录下的 test.docx 内容,以流的形式返回给页面 
 * */
public class ReadWord extends HttpServlet {


/**
* The doGet method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to get.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


doPost(request,response);
}


/**
* The doPost method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to post.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


 request.setCharacterEncoding("gbk");
 response.setContentType("text/plain;charset=gbk");
 String file = request.getParameter("boy");
//  获得项目根目录地址
 String rootPath = this.getServletConfig().getServletContext().getRealPath("/");
//  获得文件完整的地址
 String path=rootPath+file; 
 String root = getServletContext().getRealPath("/");   
 InputStream in = new FileInputStream(path);
 
 System.out.println(path);
 String bodyText=""; 
 try {
//  转换成  PushbackinputStream
  if (!in.markSupported()) {
           in = new PushbackInputStream(in, 8);
       } 
//  其他word版本
 if(POIFSFileSystem.hasPOIFSHeader(in))
 {
 HWPFDocument document = new HWPFDocument(in);
 WordExtractor extractor = new WordExtractor(document);
 bodyText = extractor.getText(); 
 response.getWriter().write(bodyText);
 return ;
 }
//   07 版本
  XWPFDocument document = new XWPFDocument(in);
  XWPFWordExtractor extractor =new XWPFWordExtractor(document);
  bodyText = extractor.getText(); 
  response.getWriter().write(bodyText);
 
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
 
}


}





下面是按照段落读取Word里的信息,参考文章http://blog.csdn.net/robinliu2010/article/details/7584173




import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.PushbackInputStream;


import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;


import org.apache.poi.hslf.model.textproperties.ParagraphFlagsTextProp;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;






/*
 *读取项目目录下的 test.docx 内容,以流的形式返回给页面 
 * */
public class Read2 extends HttpServlet {


/**
* The doGet method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to get.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


doPost(request,response);
}


/**
* The doPost method of the servlet. <br>
*
* This method is called when a form has its tag value method equals to post.

* @param request the request send by the client to the server
* @param response the response send by the server to the client
* @throws ServletException if an error occurred
* @throws IOException if an error occurred
*/
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {


 request.setCharacterEncoding("gbk");
 response.setContentType("text/plain;charset=gbk");
 String file = request.getParameter("boy");
//  获得项目根目录地址
 String rootPath = this.getServletConfig().getServletContext().getRealPath("/");
//  获得文件完整的地址
 String path=rootPath+file; 
 String root = getServletContext().getRealPath("/");   
 InputStream in = new FileInputStream(path);
 
 
 String bodyText=""; 
 try {
//  转换成  PushbackinputStream
  if (!in.markSupported()) {
           in = new PushbackInputStream(in, 8);
       } 
//  其他word版本
 if(POIFSFileSystem.hasPOIFSHeader(in))
 {
 HWPFDocument doc = new HWPFDocument(in);
 Range r = doc.getRange();
 for (int x = 0; x < r.numSections(); x++) {
      Section s = r.getSection(x);
      String[] textString=new String[s.numParagraphs()];
      for (int y = 0; y < s.numParagraphs(); y++) {
             Paragraph p = s.getParagraph(y);
             for (int z = 0; z < p.numCharacterRuns(); z++) {
                    CharacterRun run = p.getCharacterRun(z);
                    //字符串文本
                    textString[y] = run.text();
                    System.out.println(textString[y]);
             }
      }
}
 return ;
 }
//   07 版本
  XWPFDocument document = new XWPFDocument(in);
  XWPFWordExtractor extractor =new XWPFWordExtractor(document);
  bodyText = extractor.getText(); 
  response.getWriter().write(bodyText);
 
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
 
}


}

原创粉丝点击