利用WebClient正则表达式下载资源

来源：互联网发布：龙虎榜数据怎么来的编辑：程序博客网时间：2024/05/21 19:22

//我们拿下载图片举个例子
static WebClient client = new WebClient();
class Program
{
static WebClient client = new WebClient();

    static void Main(string[] args)    {    string[] str = GetHvtImgUrls(@"http://www.imooc.com/");//下载地址        string url = "";        for (int i = 0; i < str.Length; i++)        {            url = str[i];            if (url.EndsWith(".jpg"))//判断是否是以jpg来结尾的            {                if (!url.Contains("http:"))//判断是不是以hppt:开头的                {                    url = "http:" + url;                }                string filepath = @"D:\path练习\" + Guid.NewGuid() + ".jpg";                client.DownloadFile(url, filepath);            }        }    }    /// <summary>     /// 取得HTML中所有图片的 URL。     /// </summary>     /// <param name="sHtmlText">HTML代码</param>     /// <returns>图片的URL列表</returns>     public static string[] GetHvtImgUrls(string url)    { string content = client.DownloadString(url);//获取返回的信息     // 定义正则表达式用来匹配 img 标签 （正则表达式很多，若想了解跟多欢迎点击https://baike.baidu.com/item/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/1700215?fr=aladdin）  Regex m_hvtRegImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);        // 搜索匹配的字符串         MatchCollection matches = m_hvtRegImg.Matches(content);        int m_i = 0;        string[] sUrlList = new string[matches.Count];        // 取得匹配项列表         foreach (Match match in matches)        sUrlList[m_i++] = match.Groups["imgUrl"].Value;        return sUrlList;    }

阅读全文

0 0