asp.net抓取页面内容+下载文件

来源:互联网 发布:syntaxhighlighter.js 编辑:程序博客网 时间:2024/05/16 05:04
 #region 抓取页面的内容
    public string GetHTML(string url)
    {
        ASCIIEncoding encoding = new ASCIIEncoding();
        byte[] postdata = encoding.GetBytes(posts);
        HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url);
        myRequest.Method = "GET";
        myRequest.ContentType = "text/html;";
        myRequest.UserAgent = "Mozilla/5.0 (Windows NT 5.2; rv:14.0) Gecko/20100101 Firefox/14.0.1";
        myRequest.Host = "www.aizhan.com";
        //myRequest.Headers.Add("Host", "www.aizhan.com");
        myRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
        myRequest.KeepAlive = true;

        HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
        StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.UTF8);
        string content = reader.ReadToEnd();
        return content;
    }
    public string GetHTML2(string url) {
        WebClient wc = new WebClient();
        wc.Credentials = CredentialCache.DefaultCredentials;
        byte[] btPageData = wc.DownloadData(url);
        string strTargetHtml = Encoding.UTF8.GetString(btPageData);
        wc.Dispose();
        return strTargetHtml;
    }
   #endregion
原创粉丝点击