C# 从图片网站中查找符合要求的图片并按照特别要求存在本地硬盘

来源:互联网 发布:猜数字游戏c语言 编辑:程序博客网 时间:2024/05/10 13:24
using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.Text.RegularExpressions;using System.IO;using System.Web;namespace RegexPractice{    class program    {        static void Main(string[] args)        {            string entryPageUrl = "http://diggfoto.com/archive/?showall=1";            string encodingName = "utf-8";            List<PhotoInfo> photoList = PhotoInfo.ExtractPhotoList(entryPageUrl, encodingName);            List<PhotoInfo> selectedPhotoList = new List<PhotoInfo>();            foreach (PhotoInfo photoInfo in photoList)            {                DateTime date = new DateTime(int.Parse(photoInfo.Year), int.Parse(photoInfo.Month), int.Parse(photoInfo.Day));                //if (date >= DateTime.Parse("2011-07-01"))                //{                    selectedPhotoList.Add(photoInfo);                //}            }            photoList.Clear();            foreach (PhotoInfo photoInfo in selectedPhotoList)            {                string photoUriPath = string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}", photoInfo.Year, photoInfo.Month, photoInfo.Day, photoInfo.Path);                string pageSource = Util.GetPageSource(photoUriPath, "utf-8");                Regex regex = new Regex("title=.*?\" src=\"(?<sourcePath>.*?)\".*? width=\"(?<width>\\d{1,})\" height=\"(?<height>\\d{1,})\"");                Match match = regex.Match(pageSource);                if ((int.Parse(match.Groups["width"].Value) >= 1039)&&(int.Parse(match.Groups["height"].Value)>=737))                {                    string sourcePath = match.Groups["sourcePath"].Value;                    byte[] pageSourceBytes = Util.GetPageSourceBytes(sourcePath);                    if (!Directory.Exists(photoInfo.TargetSubDirPath))                    {                        Directory.CreateDirectory(photoInfo.TargetSubDirPath);                    }                    using (FileStream fs = new FileStream(photoInfo.FilePath, FileMode.Create, FileAccess.ReadWrite))                    {                        fs.Write(pageSourceBytes, 0, pageSourceBytes.Length);                    }                }            }        }    }    class Util    {        public static byte[] GetPageSourceBytes(string uri)        {            WebClient wc = new WebClient();            byte[] pageSourceBytes = wc.DownloadData(new Uri(uri));            return pageSourceBytes;        }        public static string GetPageSource(string uri, string encodingName)        {            byte[] pageSourceBytes = GetPageSourceBytes(uri);            string pageSource = Encoding.GetEncoding(encodingName).GetString(pageSourceBytes);            return pageSource;        }    }    class PhotoInfo    {        public static Regex PhotoRegex = new Regex("'http://diggfoto.com/(?<year>\\d{4})/(?<month>\\d{2})/(?<day>\\d{2})/(?<path>.*?)/'.*?>(?<title>.*?)<");        public static List<PhotoInfo> ExtractPhotoList(string url, string encodingName)        {            string pageSource = Util.GetPageSource(url, encodingName);            MatchCollection mc = PhotoRegex.Matches(pageSource);            List<PhotoInfo> photoList = new List<PhotoInfo>();            foreach (Match match in mc)            {                PhotoInfo photoInfo = new PhotoInfo();                photoInfo.Year = match.Groups["year"].Value;                photoInfo.Month = match.Groups["month"].Value;                photoInfo.Day = match.Groups["day"].Value;                photoInfo.Path = match.Groups["path"].Value;                photoInfo.Title = match.Groups["title"].Value;                photoList.Add(photoInfo);            }            return photoList;        }        public string Year { get; set; }        public string Month { get; set; }        public string Day { get; set; }        public string Path { get; set; }        public string Title { get; set; }        public string TargetSubDirPath        {            get            {                 return string.Format("c:\\{0}{1}{2}", Year, Month, Day);            }        }        public string FilePath        {            get            {                return string.Format("{0}\\{1}.jpg",TargetSubDirPath,Title);            }        }        public string GetTargetUrl()        {            return string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}/", Year, Month, Day, Path);        }    }

原创粉丝点击