使用NOPI读取Word、Excel文档内容
来源:互联网 发布:免费企业名录软件 编辑:程序博客网 时间:2024/05/22 10:45
使用NOPI读取Excel的例子很多,读取Word的例子不多。
Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。
Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。
Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。
1 using NPOI.POIFS.FileSystem; 2 using NPOI.SS.UserModel; 3 using NPOI.XSSF.UserModel; 4 using NPOI.XWPF.UserModel; 5 using System; 6 using System.Collections.Generic; 7 using System.Configuration; 8 using System.IO; 9 using System.Text; 10 11 namespace eyuan 12 { 13 public static class NOPIHandler 14 { 15 /// <summary> 16 /// 17 /// </summary> 18 /// <param name="fileName"></param> 19 /// <returns></returns> 20 public static List<List<List<string>>> ReadExcel(string fileName) 21 { 22 //打开Excel工作簿 23 XSSFWorkbook hssfworkbook = null; 24 try 25 { 26 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read)) 27 { 28 hssfworkbook = new XSSFWorkbook(file); 29 } 30 } 31 catch (Exception e) 32 { 33 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() })); 34 } 35 //循环Sheet页 36 int sheetsCount = hssfworkbook.NumberOfSheets; 37 List<List<List<string>>> workBookContent = new List<List<List<string>>>(); 38 for (int i = 0; i < sheetsCount; i++) 39 { 40 //Sheet索引从0开始 41 ISheet sheet = hssfworkbook.GetSheetAt(i); 42 //循环行 43 List<List<string>> sheetContent = new List<List<string>>(); 44 int rowCount = sheet.PhysicalNumberOfRows; 45 for (int j = 0; j < rowCount; j++) 46 { 47 //Row(逻辑行)的索引从0开始 48 IRow row = sheet.GetRow(j); 49 //循环列(各行的列数可能不同) 50 List<string> rowContent = new List<string>(); 51 int cellCount = row.PhysicalNumberOfCells; 52 for (int k = 0; k < cellCount; k++) 53 { 54 //ICell cell = row.GetCell(k); 55 ICell cell = row.Cells[k]; 56 if (cell == null) 57 { 58 rowContent.Add("NIL"); 59 } 60 else 61 { 62 rowContent.Add(cell.ToString()); 63 //rowContent.Add(cell.StringCellValue); 64 } 65 } 66 //添加行到集合中 67 sheetContent.Add(rowContent); 68 } 69 //添加Sheet到集合中 70 workBookContent.Add(sheetContent); 71 } 72 73 return workBookContent; 74 } 75 76 /// <summary> 77 /// 78 /// </summary> 79 /// <param name="fileName"></param> 80 /// <returns></returns> 81 public static string ReadExcelText(string fileName) 82 { 83 string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"]; 84 string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"]; 85 string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"]; 86 // 87 List<List<List<string>>> excelContent = ReadExcel(fileName); 88 string fileText = string.Empty; 89 StringBuilder sbFileText = new StringBuilder(); 90 //循环处理WorkBook中的各Sheet页 91 List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator(); 92 while (enumeratorWorkBook.MoveNext()) 93 { 94 95 //循环处理当期Sheet页中的各行 96 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator(); 97 while (enumeratorSheet.MoveNext()) 98 { 99 100 string[] rowContent = enumeratorSheet.Current.ToArray();101 sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));102 sbFileText.Append(ExcelRowSeparator);103 }104 sbFileText.Append(ExcelSheetSeparator);105 }106 //107 fileText = sbFileText.ToString();108 return fileText;109 }110 111 /// <summary>112 /// 读取Word内容113 /// </summary>114 /// <param name="fileName"></param>115 /// <returns></returns>116 public static string ReadWordText(string fileName)117 {118 string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];119 string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];120 string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];121 //122 string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];123 string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];124 string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];125 string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];126 //127 string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];128 //129 string fileText = string.Empty;130 StringBuilder sbFileText = new StringBuilder();131 132 #region 打开文档133 XWPFDocument document = null;134 try135 {136 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))137 {138 document = new XWPFDocument(file);139 }140 }141 catch (Exception e)142 {143 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));144 }145 #endregion146 147 #region 页眉、页脚148 //页眉149 if (CaptureWordHeader == "true")150 {151 sbFileText.AppendLine("Capture Header Begin");152 foreach (XWPFHeader xwpfHeader in document.HeaderList)153 {154 sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));155 }156 sbFileText.AppendLine("Capture Header End");157 }158 //页脚159 if (CaptureWordFooter == "true")160 {161 sbFileText.AppendLine("Capture Footer Begin");162 foreach (XWPFFooter xwpfFooter in document.FooterList)163 {164 sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));165 }166 sbFileText.AppendLine("Capture Footer End");167 }168 #endregion169 170 #region 表格171 if (CaptureWordTable == "true")172 {173 sbFileText.AppendLine("Capture Table Begin");174 foreach (XWPFTable table in document.Tables)175 {176 //循环表格行177 foreach (XWPFTableRow row in table.Rows)178 {179 foreach (XWPFTableCell cell in row.GetTableCells())180 {181 sbFileText.Append(cell.GetText());182 //183 sbFileText.Append(WordTableCellSeparator);184 }185 186 sbFileText.Append(WordTableRowSeparator);187 }188 sbFileText.Append(WordTableSeparator);189 }190 sbFileText.AppendLine("Capture Table End");191 }192 #endregion193 194 #region 图片195 if (CaptureWordImage == "true")196 {197 sbFileText.AppendLine("Capture Image Begin");198 foreach (XWPFPictureData pictureData in document.AllPictures)199 {200 string picExtName = pictureData.suggestFileExtension();201 string picFileName = pictureData.GetFileName();202 byte[] picFileContent = pictureData.GetData();203 //204 string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });205 //206 using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))207 {208 fs.Write(picFileContent, 0, picFileContent.Length);209 fs.Close();210 }211 //212 sbFileText.AppendLine(picTempName);213 }214 sbFileText.AppendLine("Capture Image End");215 }216 #endregion217 218 //正文段落219 sbFileText.AppendLine("Capture Paragraph Begin");220 foreach (XWPFParagraph paragraph in document.Paragraphs)221 {222 sbFileText.AppendLine(paragraph.ParagraphText);223 224 }225 sbFileText.AppendLine("Capture Paragraph End");226 //227 228 //229 fileText = sbFileText.ToString();230 return fileText;231 }232 233 234 }235 }
阅读全文
0 0
- 使用NOPI读取Word、Excel文档内容
- C# NOPI读取Excel
- 使用POI读取word文档内容
- 使用POI读取word文档内容
- java读取word,excel和pdf文档内容
- java操作office和pdf文件:读取word,excel和pdf文档内容
- poi读取word文档中内容
- C#读取word文档内容并显示
- java poi读取word、excel文档
- POI 创建,读取excel。处理word文档
- 使用c#读取word文档
- 使用NOPI实现Excel导入导出类
- 读取Excel文档的内容工具类
- java poi组件 读取word文档 替换文档 内容 图片
- EXCEL:读取多个EXCEL文件数据到WORD文档
- 如何使用C#读取Word文档
- 使用tm-extractors读取word文档
- Java使用poi读取word文档
- Cocos2d-x里面如何实现MVC(二)
- 查看Oracle当前用户下的主键、索引、sequence创建语句
- scala:Object与Class的区别
- JAVA编程思想学习 --- 第四章 (初始化和清除)
- [初学笔记] tic toc 计算程序运行时间
- 使用NOPI读取Word、Excel文档内容
- sphinx调用API参考(官方手册)
- JAVA编程思想学习 --- 第五章 (隐藏实施过程)
- MySQL 索引相关
- jvm的基本结构(转)
- UVA.11181 Probability|Given (概率)
- linux(十四)之linux NFS服务管理
- 用Python处理Excel——xlrd的简单使用
- 【学习摘记】马士兵JDBC技术_课时12_JDBC进行批处理