.net获取网页要的连接

来源：互联网发布：淘宝网络公司编辑：程序博客网时间：2024/06/06 14:23

---------------------- ASP.Net+Unity开发、.Net培训、期待与您交流！ ----------------------

需要HttpHelper类没有在我博客找下

一下是获取乌云网最新帖子的例子

using System;

using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;

namespace 获取乌云新帖子
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}

private void Form1_Load(object sender, EventArgs e)
{

// string[] strs = html.Split(@'<td><a href=');
//string[] arr = new string[htmltxt.Lines.Length];
// for (int i = 0; i < htmltxt.Lines.Length; i++)
// {

// arr[i] = htmltxt.Lines[i];
// }
}

private void button1_Click(object sender, EventArgs e)
{
htmltxt.Text = htmltxt.Text.Replace("\">", " ");
htmltxt.Text = htmltxt.Text.Replace("/bugs/", "http://www.wooyun.org/bugs/");
}

private void button2_Click(object sender, EventArgs e)
{

string html = HttpHellper.Htmlsource();
MatchCollection mc = Regex.Matches(html, @"<td><a href=(?<title>.*?)</a>", RegexOptions.Multiline); //第一次取出后还是有<td><a href=
foreach (Match mt in mc)
{
string CRegex = @"<td><a href=(?<title>.*?)</a>";
Regex MyRegex = new Regex(CRegex, RegexOptions.Multiline);
var title = MyRegex.Match(mt.Value).Groups["title"].Value; //<td><a href= 过滤
title = title.Trim('"'); //过滤了 "

htmltxt.Text = htmltxt.Text + "\r\n" + title;

}



//只能读第一条
//string CRegex = @"<td><a href=(?<title>.*?)</a>";

//Regex MyRegex = new Regex(CRegex, RegexOptions.Multiline);

// if (MyRegex.IsMatch(html))
// {
// var title = MyRegex.Match(html).Groups["title"].Value;
// htmltxt.Text = title;
// }
// Console.Read();

}
}

}

---------------------- ASP.Net+Unity开发、.Net培训、期待与您交流！ ----------------------

0 0