C#获取网页源文件

来源:互联网 发布:好书 知乎 编辑:程序博客网 时间:2024/04/30 01:18

使用System.Net空间下的WebClient类即可处理。代码如下:

public static string GetHtml(string url, string encode){    if (!Regex.IsMatch(StrUrl, @"^http://([\w-]+\.)+[\w-]+(/[\w-./?%&=]*)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase))    {return "";    }    try    {WebClient myWebClient = new WebClient();byte[] myDataBuffer = myWebClient.DownloadData(url);string strWeb = Encoding.Default.GetString(myDataBuffer);string webCharSet = GetRegexStr(strWeb, "<meta.*? content=(['\"]?).*?charset=(?<content>.+?)\"|'[\\s|\\S]+>", "content");Encoding encoding;try{    encoding = Encoding.GetEncoding(webCharSet);}catch{    try    {encoding = Encoding.GetEncoding(encode);    }    catch    {encoding = Encoding.Default;    }}if (encoding != Encoding.Default){    strWeb = encoding.GetString(myDataBuffer);}return strWeb;    }    catch    {return "";    }} 



原创粉丝点击