word转html

来源:互联网 发布:mac 关闭打开的软件 编辑:程序博客网 时间:2024/06/05 11:44
package com.gohouse.oss.servlet;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.io.PrintWriter;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Iterator;import java.util.List;import javax.naming.InitialContext;import javax.naming.NamingException;import javax.servlet.ServletException;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.commons.fileupload.FileItem;import org.apache.commons.fileupload.FileItemFactory;import org.apache.commons.fileupload.disk.DiskFileItemFactory;import org.apache.commons.fileupload.servlet.ServletFileUpload;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.HttpClient;import org.apache.http.client.methods.HttpPost;import org.apache.http.entity.mime.MultipartEntity;import org.apache.http.entity.mime.content.FileBody;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.model.PicturesTable;import org.apache.poi.hwpf.usermodel.CharacterRun;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.Range;import com.gohouse.oss.util.ContextUtil;import com.gohouse.oss.util.HttpClientHelper;import com.gohouse.util.log.Log;import com.gohouse.util.log.Logger;import com.google.gson.Gson;public class WordToHtml extends HttpServlet {private static final long serialVersionUID = 1L;private static Log log = Logger.getLogger(WordToHtml.class);/**  * 回车符ASCII码  */private static final short ENTER_ASCII = 13;/**  * 空格符ASCII码  */private static final short SPACE_ASCII = 32;/**  * 水平制表符ASCII码  */private static final short TABULATION_ASCII = 9;private String htmlText = "";public String htmlTextTbl = "";public int counter = 0;public int beginPosi = 0;public int endPosi = 0;public int beginArray[];public int endArray[];public String htmlTextArray[];public boolean tblExist = false;public final String inputFile = "C:/Users/miju/Desktop/aa.doc";//public static void main(String argv[]) {//try {//getWordAndStyle(inputFile);//} catch (Exception e) {//// TODO Auto-generated catch block//e.printStackTrace();//}//}@SuppressWarnings("rawtypes")public void doPost(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException {request.setCharacterEncoding("UTF-8");response.setContentType("text/html");PrintWriter out = response.getWriter();String firePath = getServletContext().getRealPath("/files");FileItemFactory factory = new DiskFileItemFactory();ServletFileUpload upload = new ServletFileUpload(factory);upload.setHeaderEncoding("UTF-8");File file = null;FileInputStream in = null;try {List items = upload.parseRequest(request);if (null != items) {Iterator itr = items.iterator();while (itr.hasNext()) {FileItem item = (FileItem) itr.next();String uploadName = item.getName();if((uploadName.toLowerCase()).endsWith("doc")){if (item.isFormField()) {continue;} else {// 以当前精确到秒的日期为上传的文件的文件名SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddkkmmss");String fileName = sdf.format(new Date());// 创建上传文件夹,已时间命名file = new File(firePath + File.separator + fileName);if(!file.exists()){file.mkdirs();}// 保存word文件File savedFile = new File(file, item.getName());item.write(savedFile);in = new FileInputStream(savedFile);String content =  getWordAndStyle(in,firePath + File.separator + fileName);Gson gosn = new Gson();out.print("{'content':" + gosn.toJson(content) + "}");// 删除上传文件deleteFile(file);}}else{out.print("{'error': '请上传正确的word-2003格式'}");}}}} catch (Exception e) {e.printStackTrace();}out.flush();out.close();}/** * 读取每个文字样式 *  * @param fileName * @throws Exception */public String getWordAndStyle(InputStream in,String path) throws Exception {HWPFDocument doc = new HWPFDocument(in);// 取得文档中字符的总数  int length = doc.characterLength();// 创建图片容器  PicturesTable pTable = doc.getPicturesTable();htmlText = "";// 创建临时字符串,好加以判断一串字符是否存在相同格式  String tempString = "";for (int i = 0; i < length - 1; i++) {// 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围  Range range = new Range(i, i + 1, doc);CharacterRun cr = range.getCharacterRun(0);if (pTable.hasPicture(cr)) {// 读写图片  tempString += this.readPicture(pTable, cr, path);} else {Range range2 = new Range(i + 1, i + 2, doc);// 第二个字符  CharacterRun cr2 = range2.getCharacterRun(0);// 当前字符  char currentChar = cr.text().charAt(0);// 判断是否为回车符  if (currentChar == ENTER_ASCII)tempString += "<br/>";// 判断是否为空格符  else if (currentChar == SPACE_ASCII)tempString += " ";// 判断是否为水平制表符  else if (currentChar == TABULATION_ASCII)tempString += "    ";// 比较前后2个字符是否具有相同的格式  boolean flag = compareCharStyle(cr, cr2);String fontStyle = "<span style='font-family:" + cr.getFontName() + ";font-size:" + cr.getFontSize()/ 2 + "pt;";if (cr.isBold())fontStyle += "font-weight:bold;";if (cr.isItalic())fontStyle += "font-style:italic;";if (flag && i != length - 2)tempString += currentChar;else if (!flag) {htmlText += fontStyle + "'>" + tempString + currentChar + "</span>";tempString = "";} elsehtmlText += fontStyle + "'>" + tempString + currentChar + "</span>";}}return htmlText;}/**  * 读写文档中的图片  *   * @param pTable  * @param cr  * @throws Exception  */private String readPicture(PicturesTable pTable, CharacterRun cr, String path)throws Exception {// 提取图片  Picture pic = pTable.extractPicture(cr, false);// 返回POI建议的图片文件名  String afileName = pic.suggestFullFileName();OutputStream out = new FileOutputStream(new File(path + File.separator + afileName));pic.writeImageContent(out);out.flush();out.close();// 上传图片HttpClientHelper.loginApi("", "");String token = ContextUtil.getToken(ContextUtil.API_KEY);String url = domain() + "/rest/images/attachments/json/0/0/0/0/-1/" + token;String guid = postUploadImg(url, new File(path + File.separator + afileName));return "<img src='" + domain() + "/rest/images/" + guid +"'/>";}/** * 上传图片 * @param url 上传图片地址 * @param file 图片文件 * @return guid */private String postUploadImg(String url,File file){try {// 上传图片HttpClient httpclient = new DefaultHttpClient();HttpPost httppost = new HttpPost(url);MultipartEntity reqEntity = new MultipartEntity();reqEntity.addPart("files", new FileBody(file));httppost.setEntity(reqEntity);log.info("执行: " + httppost.getRequestLine());HttpResponse response = httpclient.execute(httppost);log.info("StatusCode = " + response.getStatusLine().getStatusCode());HttpEntity resEntity = response.getEntity();String responseText = null;if (resEntity != null) {log.info("----------------------------------------");log.info(response.getStatusLine().toString());log.info("返回长度: " + resEntity.getContentLength());log.info("返回类型: " + resEntity.getContentType());InputStream in = resEntity.getContent();log.info("responseText = " + (responseText = HttpClientHelper.getStringByInputStream(in)));}if (resEntity != null) {InputStream is = resEntity.getContent();if (is != null) {is.close();}}return responseText.substring(responseText.indexOf("photo")+8, responseText.indexOf(",",responseText.indexOf("photo"))-1);} catch (ClientProtocolException e) {e.printStackTrace();} catch (IllegalStateException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return "";}private boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) {if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic()&& cr1.getFontName().equals(cr2.getFontName())&& cr1.getFontSize() == cr2.getFontSize()) {return true;}return false;}/** * 读取配置文件中的rest服务器地址 *  * @return */private String domain() {try {InitialContext ic = new InitialContext();return "http://" + (String) ic.lookup("java:comp/env/API_SITE_DOMAIN");} catch (NamingException e) {log.error("获取 domain 失败!" + e.getMessage());}return "";}/** * 删除文件夹 * @param file */private void deleteFile(File file) {File[] files = file.listFiles();for (File deleteFile : files) {if (deleteFile.isDirectory()) {// 如果是文件夹,则递归删除下面的文件后再删除该文件夹deleteFile(deleteFile);} else {deleteFile.delete();}}file.delete();}    }

0 0
原创粉丝点击