asp.net新闻采集(wyz)
来源:互联网 发布:大连董萍 知乎 编辑:程序博客网 时间:2024/05/29 11:19
/// <summary>
/// 生成新闻
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnAdd_Click(object sender, EventArgs e)
{
int incount1 = 0;//成功插入的条数
int incount2 = 0;//
int incount3 = 0;//
int incount4 = 0;//
int incount5 = 0;//
var str = getHTTPPage("http://news.chinawutong.com/xwkx/lydt/", "lanmuname", "pagelist", 46, 86); //陆运
incount1 = matchA(str, "http://news.chinawutong.com/", 4);//4表示公路运输咨询
var strzx = getHTTPPage("http://news.chinawutong.com/xwkx/zxzx/", "lanmuname", "pagelist", 46, 86);//最新
incount2 = matchA(strzx, "http://news.chinawutong.com/", 2);//
var strhot = getHTTPPage("http://news.chinawutong.com/ztrw/zdsj/", "lanmuname", "pagelist", 46, 86);//热点
incount3 = matchA(strhot, "http://news.chinawutong.com/", 3);//
var strxxh = getHTTPPage("http://news.chinawutong.com/xwkx/xxhzx/", "lanmuname", "pagelist", 46, 86);//信息化
incount4 = matchA(strxxh, "http://news.chinawutong.com/", 5);//
var strss = getHTTPPage("http://news.chinawutong.com/wtzl/ssrd/", "lanmuname", "pagelist", 46, 86);//时事
incount5 = matchA(strss, "http://news.chinawutong.com/", 6);//
int incount = incount1 + incount2 + incount3 + incount4 + incount5; //生成总条数
ShowInfo("本次成功生成共 " + incount + " 条.公路运输 " + incount1 + " 条.最新资讯 " + incount2 + " 条.物流热点 " + incount3 + " 条.物流信息化 " + incount4 + " 条.时事热点 " + incount5 + " 条.");
}
/// <summary>
/// 匹配A标签
/// </summary>
/// <param name="teststr"></param>
/// <returns></returns>
public int matchA(string teststr, string preurl, int categoryID)
{
MatchCollection mc = Regex.Matches(teststr, "<a.+?href=\"(.+?)\".*>(.+)</a>");
int incount = 0;//成功插入的条数
//匹配组序号从0开始
for (int i = 0; i < mc.Count; i++)
{
//先MatchCollection序列中取出match,然后进行输出
Match match = mc[i];
if (match != null)
{
if (i < 20)
{
if (newsService.GetAll().Where(n => n.Title == match.Groups[2].Value).Count() < 1)
{
incount++;
var _news = new model.News();
//如果数据库里没有这个标题则插入
_news.Title = match.Groups[2].Value;
_news.NewsContent = preurl + match.Groups[1].Value;
_news.CreateDate = DateTime.Now;
_news.UpdateDate = DateTime.Now;
_news.IsLink = 1;
_news.CategoryID = categoryID;//
_news.OriginateID = 2;//表示来源
newsService.Add(_news);
}
}
}
}
newsService.Save(); //最后保存
return incount; //返回条数
}
/// <summary>
/// 获取新闻html
/// </summary>
/// <param name="url"></param>
/// <param name="starts"></param>
/// <param name="ends"></param>
/// <param name="startnum"></param>
/// <param name="endnum"></param>
/// <returns></returns>
public string getHTTPPage(string url, string starts, string ends, int startnum, int endnum)
{
//string url = "http://news.chinawutong.com/xwkx/lydt/"; //想要抓取的页面的地址
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
//webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24 ";
Stream stream = webResponse.GetResponseStream();
System.IO.StreamReader streamReader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
string content = streamReader.ReadToEnd();
streamReader.Close();
webResponse.Close();
int start = content.IndexOf(starts) + startnum;
int end = content.IndexOf(ends) - endnum;
content = content.Substring(start, (end - start));
return content;
}
- asp.net新闻采集(wyz)
- .NET RSS新闻自动采集
- Asp.net 数据采集
- asp.net采集函数
- Asp.net 数据采集
- 新闻采集
- asp.net新闻页面分页
- asp.net新闻页面分页
- asp.net实现滚动新闻
- asp.net学生信息采集
- asp.net采集函数(采集、分析、替换、入库)
- asp.net采集函数(采集、分析、替换、入库一体)
- asp.net采集函数(采集、分析、替换、入库)
- asp.net采集函数(采集、分析、替换、入库一体)
- 用Asp.net实现新闻分页
- 设计ASP.NET新闻管理系统
- 设计ASP.NET新闻管理系统
- asp.net新闻后加“new”
- Latex中的列表环境[一]
- 【前端开发软件工具】webstorm常用快捷键大全与技巧
- 开源控件ViewPagerIndicator的使用
- 生存,只是为了生存
- 索引
- asp.net新闻采集(wyz)
- Android开发 无法导入ViewPagerIndicator或其他开源框架无法导入
- 轮播图片
- Android 基础之布局管理器
- 【设计模式】行为模式之备忘录Memento
- react-Native-Experimental-Navigation-with-redux(一)
- Maven仓库管理:Nexus
- MVC中session保存用户名
- CSS3的calc()使用