数据采集之-页面读取,图片采集 类

来源:互联网 发布:企搜通软件下载 编辑:程序博客网 时间:2024/06/06 08:39
  1. namespace DBUtility
  2. {
  3.  public    class common
  4.     {
  5.      public static string GetPageHTML(string url)
  6.         {
  7.             HttpWebRequest wr = WebRequest.Create(url) as HttpWebRequest;
  8.             try
  9.             {
  10.                 wr.Method = "get";
  11.                 wr.Accept = "*/*";
  12.                 wr.Headers.Add("Accept-Language:   zh-cn");
  13.                 wr.Headers.Add("UA-CPU:   x86");
  14.                 wr.Headers.Add("Accept-Encoding:   gzip, deflate");
  15.                 wr.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;  Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
  16.                 wr.KeepAlive = true;
  17.                 wr.ServicePoint.Expect100Continue = false;
  18.                 wr.AllowAutoRedirect = false;
  19.                 wr.Timeout = 30000;
  20.                 HttpWebResponse wre = wr.GetResponse() as HttpWebResponse;
  21.                 StreamReader sreader = new StreamReader(wre.GetResponseStream(), Encoding.Default);
  22.                 string sHtml = sreader.ReadToEnd();
  23.                 wre.Close();
  24.                 return sHtml;
  25.             }
  26.             catch
  27.             {
  28.                 return "";
  29.             }
  30.         }
  31.      public static  int SaveImageFromWeb(string imgUrl, string path,string imgname)
  32.      {
  33.           path = "e://test";
  34.           string imgName = imgname+imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf(".") );
  35.          path = path + "//" + imgName;
  36.          string defaultType = ".jpg";
  37.          string[] imgTypes = new string[] { ".jpg"".jpeg"".png"".gif"".bmp" };
  38.          string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
  39.          foreach (string it in imgTypes)
  40.          {
  41.              if (imgType.ToLower().Equals(it))
  42.                  break;
  43.              if (it.Equals(".bmp"))
  44.                  imgType = defaultType;
  45.          }
  46.          //try
  47.          //{
  48.              HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
  49.              request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
  50.              request.Timeout = 10000;
  51.              WebResponse response = request.GetResponse();
  52.              Stream stream = response.GetResponseStream();
  53.              if (response.ContentType.ToLower().StartsWith("image/"))
  54.              {
  55.                  byte[] arrayByte = new byte[1024];
  56.                  int imgLong = (int)response.ContentLength;
  57.                  int l = 0;
  58.                  // CreateDirectory(path);
  59.                  FileStream fso = new FileStream(path, FileMode.Create);
  60.                  while (l < imgLong)
  61.                  {
  62.                      int i = stream.Read(arrayByte, 0, 1024);
  63.                      fso.Write(arrayByte, 0, i);
  64.                      l += i;
  65.                  }
  66.                  fso.Close();
  67.                  stream.Close();
  68.                  response.Close();
  69.                  return 1;
  70.              }
  71.              else
  72.              {
  73.                  return 0;
  74.              }
  75.          //}
  76.          //catch 
  77.          //{
  78.          //    return 0;
  79.          //}
  80.         
  81.      }
  82.     } 
  83.    
  84. }
原创粉丝点击