如何将offcie文档(.doc、.xls、.ppt)转换成mht文档(代码篇)

来源:互联网 发布:资格证书制作软件 编辑:程序博客网 时间:2024/05/22 12:46
using System;
using Microsoft.HtmlTrans;
using System.Text;
using System.IO;
using System.Collections;

namespace OfficeDocConvertMHTML
{
   public class Conversion
   {
     //字符串的编码
     protected static Encoding encoding = Encoding.Default;
     //用于创建IHtmlTrLoadBalancerremoting对象的url
     protected static string strServiceUrl = "http://localhost:8093/HtmlTrLoadBalancer";
     public static void ConvertMHT(string inputfile, string outputfile)
     {
       //通过url(strServiceUrl)获取一个IHtmlTrLoadBalancerremoting对象
       IHtmlTrLoadBalancer htmlTrLoadBalancer =
          (IHtmlTrLoadBalancer)System.Activator.GetObject(
          typeof(IHtmlTrLoadBalancer),strServiceUrl);
       //用输入文件名(inputfile)作为一个任务的任务标示(strTask)
       string strTask = inputfile;

       //根据任务标示(strTask)新建一个任务并获取任务的url(strLauncherUri)
       string strLauncherUri = htmlTrLoadBalancer.StrGetLauncher(strTask);

       //通过任务的url(strLauncherUri)获取一个IHtmlTrLauncherremoting对象(htmlTrLauncher),
       //并用这个对象来执行该任务
       IHtmlTrLauncher htmlTrLauncher =
          (IHtmlTrLauncher)System.Activator.GetObject(typeof(IHtmlTrLauncher),strLauncherUri);

       //接下来是把输入文件(inputfile)的内容读入一个byte数组(bFile)
       byte[] bFile = null;
       FileStream fsInputMht = null;
       BinaryReader bwInputMht = null;
       try
       {
          fsInputMht = new FileStream(inputfile, FileMode.Open);
          bwInputMht = new BinaryReader(fsInputMht, encoding);
          bFile = new byte[fsInputMht.Length];
          for(long i = 0; i < bFile.LongLength; i++)
            bFile[i] = bwInputMht.ReadByte();
          bwInputMht.Close();
          fsInputMht.Close();

       }
       catch(Exception ex)
       {
          bwInputMht.Close();
          fsInputMht.Close();
          throw ex;
       }

       //CHICreateHtml通过office文档创建HTML文件及其附件
       //CHICreateHtml(
       //string strLauncherUri,         任务的url
        //byte[] rgbFile,             office文档的二进制内容
       //Microsoft.HtmlTrans.BrowserType bt, 使用浏览类型,该参数是一个枚举类型
       //string strReqFile,           office文档的路径/url
       //string strTaskName,           任务标示名,HTML转换服务器根据其跟踪该请求
       //int timeout,                 转换超时时间,如果网络状况较差,建议值设大点
       //bool fReturnFileBits          是否返回二进制内容,分别保存在CreateHtmlInforgbMainFile属性和rgrgbThicketFiles属性中
       //);
       CreateHtmlInfo chi = htmlTrLauncher.CHICreateHtml(strLauncherUri, bFile,
          BrowserType.BT_IE4, inputfile, strTask, 120, true);

       //结束转换任务
       htmlTrLoadBalancer.LauncherTaskCompleted(strLauncherUri, strTask);

       //在转换HTML文件的过程中没有错误,并且存在主文件,执行以下代码
       if(chi.ce == CreationErrorType.CE_NONE && chi.fHasMainFile)
       {
          FileStream fsOutputMht = null;
          BinaryWriter bwOutputMht = null;
          try
          {
           fsOutputMht = new FileStream(outputfile, FileMode.Create);
            bwOutputMht = new BinaryWriter(fsOutputMht, encoding);
            //HTML文件及其附件转换为MHTML文件
            byte[] bMHTMLBody = CreateMHTMLBody(chi);
            bwOutputMht.Write(bMHTMLBody);
            bwOutputMht.Close();
            fsOutputMht.Close();
            return;
          }
          catch(Exception ex)
          {
            bwOutputMht.Close();
            fsOutputMht.Close();
            throw ex;
          }
       }
       return;
     }

     //MHTML文件头信息
     protected static string MIME   =
       "MIME-Version: 1.0" + Environment.NewLine +
       "Content-Type: multipart/related; boundary=/"{0}/"" + Environment.NewLine +
       Environment.NewLine;
     //MHTML各个文件的头信息
     protected static string HEADER  = 
       Environment.NewLine + "--{0}" + Environment.NewLine +
       "Content-Location: {1}" + Environment.NewLine +
       "Content-Transfer-Encoding: {2}" + Environment.NewLine +
       "Content-Type: {3}" + Environment.NewLine +
       Environment.NewLine;
     //定义MHTML中各文件之间的分隔符
     protected static string BOUNDARY =  "Define_It_Youself";
     //MHTML主文件的URL
     protected static string LOCATION =  "http://MySiteUrl/";

     private static byte[] CreateMHTMLBody(CreateHtmlInfo creatHtmlInfo)
     {
       //将回车换行符进行编码并存储在字节数组中
       byte[] bNewLine = Encoding.UTF8.GetBytes(Environment.NewLine);
       //3D进行编码并存储在字节数组中
       byte[] bAfterEquals = encoding.GetBytes("3D");
       //'='byte值为61
       byte bEquals = 61;
       //MHTML文件的长度
       long lMHTMLBodyLength = 0;
       //从零开始的字节偏移量
       long lOffset = 0;
       //根据BOUNDARY的定义形成MTHML文件的头信息
       string strMIME = string.Format(MIME, BOUNDARY);
       //将头信息进行编码并存储在字节数组中
       byte[] bMIME = encoding.GetBytes(strMIME);
       //MHTML文件的长度增加bMIME.LongLength
       lMHTMLBodyLength += bMIME.LongLength;

       //根据信息定义主文件的头信息
       string strMainHeader = string.Format(HEADER,
          BOUNDARY,
          LOCATION + creatHtmlInfo.strMainFileName,
          TransferEncoding.QUOTED_PRINTABLE,
          ContentType.TEXT_HTML);
       byte[] bMainHeader = encoding.GetBytes(strMainHeader);
       lMHTMLBodyLength += bMainHeader.LongLength;

       //建立一个动态临时数组
       ArrayList alTempArray = new ArrayList();
      
       //主文件的正文部分所有的"="替换成"=3D"
       for(int i = 0; i < creatHtmlInfo.rgbMainFile.Length; i ++)
       {
          alTempArray.Add(creatHtmlInfo.rgbMainFile[i]);
          if(creatHtmlInfo.rgbMainFile[i] == bEquals)
          {
            alTempArray.Add(bAfterEquals[0]);
            alTempArray.Add(bAfterEquals[1]);
          }
       }
       //获取新的主文件的正文部分并存储在字节数组中
       byte[] bMainBody = new byte[alTempArray.Count];
       alTempArray.CopyTo(bMainBody);
       lMHTMLBodyLength += bMainBody.LongLength;
       alTempArray.Clear();
      
       //申明存储MHTML附件的正文内容字节数组,该数组为一个二维数组
       byte[][] bThicketContent = null;
       //申明存储MHTML附件的头信息字节数组
       string[] strThicketHeaders = null;
       //如果MHTML存在附件则执行以下代码
       if(creatHtmlInfo.fHasThicket)
       {
          bThicketContent = new byte[creatHtmlInfo.rgrgbThicketFiles.Length][];
          strThicketHeaders = new string[creatHtmlInfo.rgrgbThicketFiles.Length];
          for(int i = 0; i < strThicketHeaders.Length; i++)
          {
            //定义附件的头信息
            string strLocation = LOCATION +
              creatHtmlInfo.strThicketFolderName + "/" +
              creatHtmlInfo.rgstrThicketFileNames[i];
            string strTransferEncoding = TransferEncoding.GetTransferEncodingByFileName
              (creatHtmlInfo.rgstrThicketFileNames[i]);
            string strContentType = ContentType.GetContentTypeByFileName
              (creatHtmlInfo.rgstrThicketFileNames[i]);
            strThicketHeaders[i] = string.Format(HEADER,
              BOUNDARY,
              strLocation,
              strTransferEncoding,
              strContentType);
            byte[] bThicketHeader = encoding.GetBytes(strThicketHeaders[i]);
         
            StringBuilder strBase64ThicketBody = new StringBuilder();
            byte[] bThicketBody = null;
            //如果附件二进制文件,那么用BASE64编码
            if(strTransferEncoding ==
              TransferEncoding.BASE64)
            {
              //首先将字节数组里的内容转换为Base64编码的字符串
              strBase64ThicketBody.Append(
                 Convert.ToBase64String(creatHtmlInfo.rgrgbThicketFiles[i]));
              //然后将字符串进行编码存储在新的字节数组中
              bThicketBody = encoding.GetBytes(strBase64ThicketBody.ToString());
              //76个字节,加入一个换行符
              int BUFFER_SIZE = 76;
              for(int j = 0; j < bThicketBody.Length; j++)
              {
                 alTempArray.Add(bThicketBody[j]);
                 if(j % BUFFER_SIZE == BUFFER_SIZE - 1)
                 {
                   alTempArray.Add(bNewLine[0]);
                   alTempArray.Add(bNewLine[1]);
                 }
              }
              bThicketBody = new byte[alTempArray.Count];
              alTempArray.CopyTo(bThicketBody);
              alTempArray.Clear();
            }
              //如果附件是以明文编码,那么明文编码,并将附件正文部分所有的"="替换成"=3D"
            else
            {
              for(int j = 0; j < creatHtmlInfo.rgrgbThicketFiles[i].Length; j++)
              {
                 alTempArray.Add(creatHtmlInfo.rgrgbThicketFiles[i][j]);
                 if(creatHtmlInfo.rgrgbThicketFiles[i][j] == bEquals)
                 {
                   alTempArray.Add(bAfterEquals[0]);
                   alTempArray.Add(bAfterEquals[1]);
                 }
              }
              bThicketBody = new byte[alTempArray.Count];
              alTempArray.CopyTo(bThicketBody);
              alTempArray.Clear();
            }
           
            //将附件中的头信息字节数组和正文的字节数组合并存储在bThicketContent[i],
            //并在lMHTMLBodyLength增加相应的长度
            bThicketContent[i] = new byte[bThicketHeader.LongLength + bThicketBody.LongLength + bNewLine.LongLength];
            Array.Copy(
              bThicketHeader,
              0,
              bThicketContent[i],
              0,
              bThicketHeader.LongLength);
            Array.Copy(
              bThicketBody,
              0,
              bThicketContent[i],
              bThicketHeader.LongLength,
              bThicketBody.LongLength);
            Array.Copy(
              bNewLine,
              0,
              bThicketContent[i],
              bThicketHeader.LongLength + bThicketBody.LongLength,
              bNewLine.LongLength);
            lMHTMLBodyLength += bThicketContent[i].LongLength;
          }
       }
       //MHTML文件结束分割符的存储在字节数组中
       byte[] bEndBoundary = encoding.GetBytes(
          Environment.NewLine + "--" + BOUNDARY + "--" +Environment.NewLine);
       lMHTMLBodyLength += bEndBoundary.LongLength;

        //新建一个数组,该数组用于存储MHTML文件的所有内容
       byte[] bMHTMLBody = new byte[lMHTMLBodyLength];
       //将所有的内容全部合并,并存储在数组bMHTMLBody
       Array.Copy(bMIME, 0, bMHTMLBody, lOffset, bMIME.LongLength);
       lOffset += bMIME.LongLength;
       Array.Copy(bMainHeader, 0, bMHTMLBody, lOffset, bMainHeader.LongLength);
       lOffset += bMainHeader.LongLength;
       Array.Copy(bMainBody, 0, bMHTMLBody, lOffset, bMainBody.LongLength);
       lOffset += bMainBody.LongLength;
       if(bThicketContent != null)
          for(int i = 0; i < bThicketContent.Length; i++)
          {
            Array.Copy(
              bThicketContent[i],
              0,
              bMHTMLBody,
              lOffset,
              bThicketContent[i].LongLength);
            lOffset += bThicketContent[i].LongLength;
          }
       Array.Copy(bEndBoundary, 0, bMHTMLBody, lOffset, bEndBoundary.LongLength);
  
       return bMHTMLBody;
     }
   }
   //根据不同的文件后缀名定义编码方式
   class TransferEncoding
   {
     public const string QUOTED_PRINTABLE =  "quoted-printable";
     public const string BASE64        =  "base64";

     public static string GetTransferEncodingByFileName(string fileName)
     {
       string strExtension = fileName.Remove(0,fileName.LastIndexOf(".")).ToUpper();
       switch(strExtension)
       {
            //以下文件名在MTHML文件中都将以明文的形式编码
          default:
          case ".HTM":
          case ".HTML":
          case ".CSS":
          case ".XML":
            return TransferEncoding.QUOTED_PRINTABLE;
            //以下文件名在MHTML文件中都将以BASE64编码形式出现
          case ".JPG":
          case ".JEPG":
          case ".PNG":
          case ".MSO":
          case ".EMZ":
          case ".GIF":
          case ".WMF":
            return TransferEncoding.BASE64;
       }
       return string.Empty;
     }
   }
   //根据不同的后缀名定义文件内容的类型
   class ContentType
   {
     public const string TEXT_HTML       =  "text/html; charset=/"us-ascii/"";
     public const string APPLICATION_XMSO =  "application/x-mso";
     public const string IMAGE_XEMZ      =  "image/x-emz";
     public const string IMAGE_GIF       =  "image/gif";
     public const string TEXT_CSS       =  "text/css";
     public const string TEXT_XML       =  "text/xml; charset=/"utf-8/"";
     public const string IMAGE_XWMF      =  "image/x-wmf";
     public const string IMAGE_PNG       =  "image/png";
     public const string IMAGE_JPEG      =  "image/jpeg";

     public static string GetContentTypeByFileName(string fileName)
     {
       string strExtension = fileName.Remove(0,fileName.LastIndexOf(".")).ToUpper();
       switch(strExtension)
       {
            //以下文件名在MHTML文件中的类型是text/html; charset="us-ascii"
          case ".HTM":
          case ".HTML":
            return ContentType.TEXT_HTML;
            //以下文件名在MHTML文件中的类型是application/x-mso
          case ".MSO":
            return ContentType.APPLICATION_XMSO;
           //以下文件名在MHTML文件中的类型是image/x-emz
          case ".EMZ":
            return ContentType.IMAGE_XEMZ;
            //以下文件名在MHTML文件中的类型是image/gif
          case ".GIF":
            return ContentType.IMAGE_GIF;
            //以下文件名在MHTML文件中的类型是text/css
          case ".CSS":
            return ContentType.TEXT_CSS;
            //以下文件名在MHTML文件中的类型是text/xml; charset="utf-8"
          case ".XML":
            return ContentType.TEXT_XML;
            //以下文件名在MHTML文件中的类型是image/x-wmf
          case ".WMF":
            return ContentType.IMAGE_XWMF;
            //以下文件名在MHTML文件中的类型是image/png
          case ".PNG":
            return ContentType.IMAGE_PNG;
            //以下文件名在MHTML文件中的类型是image/jpeg
          case ".JPG":
          case ".JEPG":
            return ContentType.IMAGE_JPEG;
       }
       return string.Empty;
     }
   }
}
原创粉丝点击