通过WebClient类来发起请求并下载html 抓取邮箱 图片

来源:互联网 发布:积分兑换商城源码 php 编辑:程序博客网 时间:2024/05/04 10:34
 using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.Text.RegularExpressions;using System.IO;namespace 通过WebClient类来发起请求并下载html 抓取邮箱 图片{    class Program    {        static void Main(string[] args)        {            #region 抓取网页email            //string url = "http://192.168.1.100:8080/提取Email.htm";            ////1.根据网址下载对应html字符串            //WebClient wc = new WebClient();            //wc.Encoding = Encoding.UTF8;            //string html = wc.DownloadString("http://192.168.1.100:8080/提取Email.htm");            ////2.从下载到字符串中提取Email,并把提取到的Email写入到文本文件中            //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z0-9]+(\.[a-zA-Z0-9]+){1,}");            //using (StreamWriter writer = new StreamWriter("email.txt"))            //{            //    //遍历提取到的email            //    foreach (Match item in matches)            //    {            //        //Console.WriteLine(item.Value);            //        writer.WriteLine(item.Value);            //    }            //}            //Console.ReadKey();            #endregion            #region 抓取网页图片            //WebClient wc = new WebClient();            ////1.下载网页源代码            //string html = wc.DownloadString("http://192.168.1.100:8080/美女图片/美女们.htm");            ////2.提取网页中的图片,其实就是<img>标签            ////<img alt="" src="hotgirls/00_00.jpg" />            //MatchCollection matches = Regex.Matches(html, @"<img\s+alt="""" src=""(.+)""\s*/>");            //foreach (Match item in matches)            //{            //    string imgPath = "http://192.168.1.100:8080/美女图片/" + item.Groups[1].Value;            //    //下载图片            //    wc.DownloadFile(imgPath, @"c:\mv\" + Path.GetFileName(imgPath));            //}            //Console.WriteLine("ok");            //Console.ReadKey();            #endregion            #region 抓取职位信息            WebClient webClient = new WebClient();            string html = webClient.DownloadString("http://192.168.1.100:8080/【上海,IT-管理,计算机软件招聘,求职】-前程无忧.htm");            //<a href="http://search.51job.com/job/46621778,c.html" onclick="zzSearch.acStatRecJob( 1 );" class="jobname" target="_blank">ERP项目经理</a>            MatchCollection matches = Regex.Matches(html, @"<a\s+href=""http://search.51job.com/job/[0-9]{8},c.html"".+>(.+)</a>");            foreach (Match item in matches)            {                Console.WriteLine(item.Groups[1].Value);            }            Console.WriteLine("共{0}个职位信息。", matches.Count);            Console.ReadKey();            #endregion        }    }}
0 0
原创粉丝点击