C# 从图片网站中查找符合要求的图片并按照特别要求存在本地硬盘
来源:互联网 发布:猜数字游戏c语言 编辑:程序博客网 时间:2024/05/10 13:24
using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.Text.RegularExpressions;using System.IO;using System.Web;namespace RegexPractice{ class program { static void Main(string[] args) { string entryPageUrl = "http://diggfoto.com/archive/?showall=1"; string encodingName = "utf-8"; List<PhotoInfo> photoList = PhotoInfo.ExtractPhotoList(entryPageUrl, encodingName); List<PhotoInfo> selectedPhotoList = new List<PhotoInfo>(); foreach (PhotoInfo photoInfo in photoList) { DateTime date = new DateTime(int.Parse(photoInfo.Year), int.Parse(photoInfo.Month), int.Parse(photoInfo.Day)); //if (date >= DateTime.Parse("2011-07-01")) //{ selectedPhotoList.Add(photoInfo); //} } photoList.Clear(); foreach (PhotoInfo photoInfo in selectedPhotoList) { string photoUriPath = string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}", photoInfo.Year, photoInfo.Month, photoInfo.Day, photoInfo.Path); string pageSource = Util.GetPageSource(photoUriPath, "utf-8"); Regex regex = new Regex("title=.*?\" src=\"(?<sourcePath>.*?)\".*? width=\"(?<width>\\d{1,})\" height=\"(?<height>\\d{1,})\""); Match match = regex.Match(pageSource); if ((int.Parse(match.Groups["width"].Value) >= 1039)&&(int.Parse(match.Groups["height"].Value)>=737)) { string sourcePath = match.Groups["sourcePath"].Value; byte[] pageSourceBytes = Util.GetPageSourceBytes(sourcePath); if (!Directory.Exists(photoInfo.TargetSubDirPath)) { Directory.CreateDirectory(photoInfo.TargetSubDirPath); } using (FileStream fs = new FileStream(photoInfo.FilePath, FileMode.Create, FileAccess.ReadWrite)) { fs.Write(pageSourceBytes, 0, pageSourceBytes.Length); } } } } } class Util { public static byte[] GetPageSourceBytes(string uri) { WebClient wc = new WebClient(); byte[] pageSourceBytes = wc.DownloadData(new Uri(uri)); return pageSourceBytes; } public static string GetPageSource(string uri, string encodingName) { byte[] pageSourceBytes = GetPageSourceBytes(uri); string pageSource = Encoding.GetEncoding(encodingName).GetString(pageSourceBytes); return pageSource; } } class PhotoInfo { public static Regex PhotoRegex = new Regex("'http://diggfoto.com/(?<year>\\d{4})/(?<month>\\d{2})/(?<day>\\d{2})/(?<path>.*?)/'.*?>(?<title>.*?)<"); public static List<PhotoInfo> ExtractPhotoList(string url, string encodingName) { string pageSource = Util.GetPageSource(url, encodingName); MatchCollection mc = PhotoRegex.Matches(pageSource); List<PhotoInfo> photoList = new List<PhotoInfo>(); foreach (Match match in mc) { PhotoInfo photoInfo = new PhotoInfo(); photoInfo.Year = match.Groups["year"].Value; photoInfo.Month = match.Groups["month"].Value; photoInfo.Day = match.Groups["day"].Value; photoInfo.Path = match.Groups["path"].Value; photoInfo.Title = match.Groups["title"].Value; photoList.Add(photoInfo); } return photoList; } public string Year { get; set; } public string Month { get; set; } public string Day { get; set; } public string Path { get; set; } public string Title { get; set; } public string TargetSubDirPath { get { return string.Format("c:\\{0}{1}{2}", Year, Month, Day); } } public string FilePath { get { return string.Format("{0}\\{1}.jpg",TargetSubDirPath,Title); } } public string GetTargetUrl() { return string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}/", Year, Month, Day, Path); } }