使用NOPI读取Word、Excel文档内容

来源:互联网 发布:免费企业名录软件 编辑:程序博客网 时间:2024/05/22 10:45


使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

复制代码
  1 using NPOI.POIFS.FileSystem;  2 using NPOI.SS.UserModel;  3 using NPOI.XSSF.UserModel;  4 using NPOI.XWPF.UserModel;  5 using System;  6 using System.Collections.Generic;  7 using System.Configuration;  8 using System.IO;  9 using System.Text; 10  11 namespace eyuan 12 { 13     public static class NOPIHandler 14     { 15         /// <summary> 16         ///  17         /// </summary> 18         /// <param name="fileName"></param> 19         /// <returns></returns> 20         public static List<List<List<string>>> ReadExcel(string fileName) 21         { 22             //打开Excel工作簿 23             XSSFWorkbook hssfworkbook = null; 24             try 25             { 26                 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) 27                 { 28                     hssfworkbook = new XSSFWorkbook(file); 29                 } 30             } 31             catch (Exception e) 32             { 33                 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() })); 34             } 35             //循环Sheet页 36             int sheetsCount = hssfworkbook.NumberOfSheets; 37             List<List<List<string>>> workBookContent = new List<List<List<string>>>(); 38             for (int i = 0; i < sheetsCount; i++) 39             { 40                 //Sheet索引从0开始 41                 ISheet sheet = hssfworkbook.GetSheetAt(i); 42                 //循环行 43                 List<List<string>> sheetContent = new List<List<string>>(); 44                 int rowCount = sheet.PhysicalNumberOfRows; 45                 for (int j = 0; j < rowCount; j++) 46                 { 47                     //Row(逻辑行)的索引从0开始 48                     IRow row = sheet.GetRow(j); 49                     //循环列(各行的列数可能不同) 50                     List<string> rowContent = new List<string>(); 51                     int cellCount = row.PhysicalNumberOfCells; 52                     for (int k = 0; k < cellCount; k++) 53                     { 54                         //ICell cell = row.GetCell(k); 55                         ICell cell = row.Cells[k]; 56                         if (cell == null) 57                         { 58                             rowContent.Add("NIL"); 59                         } 60                         else 61                         { 62                             rowContent.Add(cell.ToString()); 63                             //rowContent.Add(cell.StringCellValue); 64                         } 65                     } 66                     //添加行到集合中 67                     sheetContent.Add(rowContent); 68                 } 69                 //添加Sheet到集合中 70                 workBookContent.Add(sheetContent); 71             } 72  73             return workBookContent; 74         } 75  76         /// <summary> 77         ///  78         /// </summary> 79         /// <param name="fileName"></param> 80         /// <returns></returns> 81         public static string ReadExcelText(string fileName) 82         { 83             string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"]; 84             string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"]; 85             string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"]; 86             // 87             List<List<List<string>>> excelContent = ReadExcel(fileName); 88             string fileText = string.Empty; 89             StringBuilder sbFileText = new StringBuilder(); 90             //循环处理WorkBook中的各Sheet页 91             List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator(); 92             while (enumeratorWorkBook.MoveNext()) 93             { 94  95                 //循环处理当期Sheet页中的各行 96                 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator(); 97                 while (enumeratorSheet.MoveNext()) 98                 { 99 100                     string[] rowContent = enumeratorSheet.Current.ToArray();101                     sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));102                     sbFileText.Append(ExcelRowSeparator);103                 }104                 sbFileText.Append(ExcelSheetSeparator);105             }106             //107             fileText = sbFileText.ToString();108             return fileText;109         }110 111         /// <summary>112         /// 读取Word内容113         /// </summary>114         /// <param name="fileName"></param>115         /// <returns></returns>116         public static string ReadWordText(string fileName)117         {118             string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];119             string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];120             string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];121             //122             string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];123             string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];124             string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];125             string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];126             //127             string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];128             //129             string fileText = string.Empty;130             StringBuilder sbFileText = new StringBuilder();131 132             #region 打开文档133             XWPFDocument document = null;134             try135             {136                 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))137                 {138                     document = new XWPFDocument(file);139                 }140             }141             catch (Exception e)142             {143                 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));144             }145             #endregion146 147             #region 页眉、页脚148             //页眉149             if (CaptureWordHeader == "true")150             {151                 sbFileText.AppendLine("Capture Header Begin");152                 foreach (XWPFHeader xwpfHeader in document.HeaderList)153                 {154                     sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));155                 }156                 sbFileText.AppendLine("Capture Header End");157             }158             //页脚159             if (CaptureWordFooter == "true")160             {161                 sbFileText.AppendLine("Capture Footer Begin");162                 foreach (XWPFFooter xwpfFooter in document.FooterList)163                 {164                     sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));165                 }166                 sbFileText.AppendLine("Capture Footer End");167             }168             #endregion169 170             #region 表格171             if (CaptureWordTable == "true")172             {173                 sbFileText.AppendLine("Capture Table Begin");174                 foreach (XWPFTable table in document.Tables)175                 {176                     //循环表格行177                     foreach (XWPFTableRow row in table.Rows)178                     {179                         foreach (XWPFTableCell cell in row.GetTableCells())180                         {181                             sbFileText.Append(cell.GetText());182                             //183                             sbFileText.Append(WordTableCellSeparator);184                         }185 186                         sbFileText.Append(WordTableRowSeparator);187                     }188                     sbFileText.Append(WordTableSeparator);189                 }190                 sbFileText.AppendLine("Capture Table End");191             }192             #endregion193 194             #region 图片195             if (CaptureWordImage == "true")196             {197                 sbFileText.AppendLine("Capture Image Begin");198                 foreach (XWPFPictureData pictureData in document.AllPictures)199                 {200                     string picExtName = pictureData.suggestFileExtension();201                     string picFileName = pictureData.GetFileName();202                     byte[] picFileContent = pictureData.GetData();203                     //204                     string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });205                     //206                     using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))207                     {208                         fs.Write(picFileContent, 0, picFileContent.Length);209                         fs.Close();210                     }211                     //212                     sbFileText.AppendLine(picTempName);213                 }214                 sbFileText.AppendLine("Capture Image End");215             }216             #endregion217 218             //正文段落219             sbFileText.AppendLine("Capture Paragraph Begin");220             foreach (XWPFParagraph paragraph in document.Paragraphs)221             {222                 sbFileText.AppendLine(paragraph.ParagraphText);223 224             }225             sbFileText.AppendLine("Capture Paragraph End");226             //227 228             //229             fileText = sbFileText.ToString();230             return fileText;231         }232 233 234     }235 }
原创粉丝点击