C#获取网页信息示例

来源：互联网发布：令狐冲知乎编辑：程序博客网时间：2024/06/06 03:00

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Web;
using System.Net;
using System.Text.RegularExpressions;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
string url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/600866.phtml";
String sourcecode = Htmlsource.getHtmlsource(url);
sourcecode = sourcecode.Replace("\"", "").Replace(" ","");
string pattern_t = @"<tableid=comInfo1width=100%>(.*?)</table>";
string pattern_h = @"<a.*?href=.*?>|</a>|\s";
Regex regex = new Regex(pattern_t, RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (regex.IsMatch(sourcecode))
{
sourcecode = regex.Match(sourcecode).Value;
}

string pattern_td = @"<td.*?>(.*?)</td>";
Regex regex_td = new Regex(pattern_td, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string[] arr = new string[100];
for (int i = 0; i < arr.Length; i++) arr[i] = "-1";
if (regex_td.IsMatch(sourcecode))
{
int j = 0;
foreach (Match m in regex_td.Matches(sourcecode))
{
arr[j] = m.Groups[1].Value;
Regex regex_h = new Regex(pattern_h, RegexOptions.Singleline | RegexOptions.IgnoreCase);
arr[j] = regex_h.Replace(arr[j], "");
j++;
}
}

// Console.WriteLine(sourcecode);
for (int i = 0; i < arr.Length; i++) Console.WriteLine(arr[i]);

}
}

class Htmlsource
{
//获取网页
public static String getHtmlsource(String url)
{
string strHtml;
WebClient webclient = new WebClient();
Stream stream = webclient.OpenRead(url);
StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
strHtml = reader.ReadToEnd();
stream.Close();
reader.Close();
strHtml = TransferEncoding(System.Text.Encoding.GetEncoding("gb2312"), System.Text.Encoding.GetEncoding("utf-8"), strHtml);
return strHtml;
}

//转码
private static string TransferEncoding(Encoding srcEncoding, Encoding dstEncoding, string srcStr)
{
byte[] srcBytes = srcEncoding.GetBytes(srcStr);
byte[] bytes = Encoding.Convert(srcEncoding, dstEncoding, srcBytes);
return dstEncoding.GetString(bytes);
}
}
}

0 0