下载并读取PDF文本内容
来源:互联网 发布:ecmall多用户商城源码 编辑:程序博客网 时间:2024/05/29 16:17
string strUrlFilePath = string.Empty;
string url = @"http://www.jsgsj.gov.cn:58888//province/NoticeServlet.json?showCrcontentPdf=true&org=2156&id=70789446&seqId=1";
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.8.1.8) Gecko/20071008 Firefox/2.0.0.8";
request.Accept = "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
request.AllowAutoRedirect = true;
request.Headers.Add(HttpRequestHeader.AcceptCharset, "gb2312,utf-8;q=0.7,*;q=0.7");
request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");
request.Headers.Add(HttpRequestHeader.Cookie, "id58=05dz8VMG5yk8RCViPEsFAg==; city=wz; 58home=wz; ipcity=wz|%u6E29%u5DDE; myfeet_tooltip=end; __utma=253535702.1542940914.1392961328.1392961328.1392961328.1; __utmc=253535702; __utmz=253535702.1392961328.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)");
request.Timeout = 30000;
HttpWebResponse httpresponse = (HttpWebResponse)request.GetResponse();
Stream stream = httpresponse.GetResponseStream();
if (stream != null)
{
strUrlFilePath = httpresponse.ResponseUri.ToString();//拿到跳转后的地址
}
httpresponse.Close();
WebClient wc = new WebClient();
String pdf_path = @"D:\其它\PDFDownloadFile\aaaa.pdf";
if (!System.IO.Directory.Exists(@"D:\其它\PDFDownloadFile"))
{
System.IO.Directory.CreateDirectory(@"D:\其它\PDFDownloadFile");//不存在就创建目录
}
if (!string.IsNullOrEmpty(strUrlFilePath))
{
wc.DownloadFile(strUrlFilePath, pdf_path);//下载文件
}
//获取pdf内容
FileInfo file = new FileInfo(pdf_path);
PDDocument doc = PDDocument.load(file.FullName);
PDFTextStripper pdfStripper = new PDFTextStripper();
string text = pdfStripper.getText(doc);
string url = @"http://www.jsgsj.gov.cn:58888//province/NoticeServlet.json?showCrcontentPdf=true&org=2156&id=70789446&seqId=1";
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.8.1.8) Gecko/20071008 Firefox/2.0.0.8";
request.Accept = "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
request.AllowAutoRedirect = true;
request.Headers.Add(HttpRequestHeader.AcceptCharset, "gb2312,utf-8;q=0.7,*;q=0.7");
request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");
request.Headers.Add(HttpRequestHeader.Cookie, "id58=05dz8VMG5yk8RCViPEsFAg==; city=wz; 58home=wz; ipcity=wz|%u6E29%u5DDE; myfeet_tooltip=end; __utma=253535702.1542940914.1392961328.1392961328.1392961328.1; __utmc=253535702; __utmz=253535702.1392961328.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)");
request.Timeout = 30000;
HttpWebResponse httpresponse = (HttpWebResponse)request.GetResponse();
Stream stream = httpresponse.GetResponseStream();
if (stream != null)
{
strUrlFilePath = httpresponse.ResponseUri.ToString();//拿到跳转后的地址
}
httpresponse.Close();
WebClient wc = new WebClient();
String pdf_path = @"D:\其它\PDFDownloadFile\aaaa.pdf";
if (!System.IO.Directory.Exists(@"D:\其它\PDFDownloadFile"))
{
System.IO.Directory.CreateDirectory(@"D:\其它\PDFDownloadFile");//不存在就创建目录
}
if (!string.IsNullOrEmpty(strUrlFilePath))
{
wc.DownloadFile(strUrlFilePath, pdf_path);//下载文件
}
//获取pdf内容
FileInfo file = new FileInfo(pdf_path);
PDDocument doc = PDDocument.load(file.FullName);
PDFTextStripper pdfStripper = new PDFTextStripper();
string text = pdfStripper.getText(doc);
阅读全文
0 0
- 下载并读取PDF文本内容
- xpdf读取pdf文件并根据pdf内容修改文件名称
- xpdf读取pdf文件并根据pdf内容修改文件名称
- Android使用iText生成pdf文件并读取pdf内容
- 用iTextSharp读取PDF格式文档中的文本内容
- java下载网页并读取内容
- 如何编辑pdf文件并添加文本内容
- C# 读取pdf文本
- asp.net创建并写入文本和读取文件内容
- Java读取txt文本内容并写入Excel
- matlab读取txt文本内容修改并保存到txt
- js拖拽文本文档并读取内容到textarea
- 读取pdf中的内容
- 读取pdf文件内容
- Python读取PDF内容
- Delphi读取文本内容
- hadoop 读取 文本内容
- C++ 读取文本内容
- 2017年上半年软件工程师考试--程序员(上午)考点总结三
- //TODO
- 使用自定义注解和token防止表单重复提交
- Java调度,scheduleAtFixedRate方法的使用
- 关于spring mvc 时间转换
- 下载并读取PDF文本内容
- Python-sklearn机器学习的第一个样例(2)
- BZOJ-3229 合并石子变态版
- TFLearn MNIST
- js实现手机验证码发送
- SylixOS线程私有数据浅析
- jQuery的document ready与 onload事件——你真的思考过吗?
- android 使用贝塞尔曲线实现“波浪”效果思路解析
- 微信小程序 image组件的mode属性 以及 图片出现横向滚动轴