using System;using System.Collections.Generic;using System.Text;using lintTools;using System.Web;using System.Net;using System.IO;using System.Text.RegularExpressions;using System.Collections;namespace ty{ class Program { static void Main(string[] args) { SortedList[] arrTitle; string content="",writer="",title="",nextUrl=""; LintSys.WriteLine("********************天涯文章下载大师v0.1 Copyright By lintg.200801***********",ConsoleColor.Yellow); LintSys.WriteLine("-s:单篇文章地址,默认取文章名称为下载文件名称,默认下载多页", ConsoleColor.Yellow); LintSys.WriteLine("-u:下载页面的首页,默认取天涯舞文弄墨首页", ConsoleColor.Yellow); LintSys.WriteLine("-p:下载页数,默认取100页", ConsoleColor.Yellow); LintSys.WriteLine("-d:下载目录,默认取当前目录", ConsoleColor.Yellow); LintSys.WriteLine("-f:过滤文件大小,默认过滤4000字节以下文件", ConsoleColor.Yellow); string downMode = "m",url="http://cache.tianya.cn/pub/list/0/culture.shtml",downDir="./",sUrl=""; int downPage = 100,filterBytes=4000; Regex regex=new Regex(@"-(?<paramName>[supdf]):(?<paramValue>/S*?)___FCKpd___0quot;,RegexOptions.Singleline); for(int i=0;i<args.Length;i++) { // Console.WriteLine(args[i]); Match m=regex.Match(args[i]); if(m.Success) { //Console.WriteLine(m.Result("${paramName}") + m.Result("${paramValue}")); switch(m.Result("${paramName}")) { case "s": downMode="s"; sUrl=m.Result("${paramValue}"); Console.WriteLine(sUrl); break; case "u": url=m.Result("${paramValue}"); break; case "p": try{ downPage=int.Parse(m.Result("${paramValue}").ToString()); } catch { Console.WriteLine("参数错误,-p:页数"); } break; case "d": downDir=m.Result("${paramValue}").ToString(); break; case "f": try{ filterBytes=int.Parse(m.Result("${paramValue}").ToString()); } catch { Console.WriteLine("参数错误,-f:过滤文件大小"); } break; } } } switch(downMode) { case "s": GetAuthor(sUrl,ref writer,ref title); content = GetArticle(sUrl,writer); LintSys.WriteFile(downDir+title + ".txt", content); break; default: for(int j=0;j<downPage;j++) { LintSys.WriteLine("第" + (j + 1).ToString() + "页:"+url, ConsoleColor.Red); arrTitle = GetTitle(url, ref nextUrl); // return; for (int i = 1; i < arrTitle.Length-1; i++) { LintSys.WriteLine((j + 1).ToString() + "-"+i.ToString()+",下载文章:"+arrTitle[i]["title"].ToString()+",作者:"+arrTitle[i]["writer"],ConsoleColor.Green); content = GetArticle( arrTitle[i]["url"].ToString(), arrTitle[i]["writer"].ToString()); if(content.Length>filterBytes) LintSys.WriteFile(arrTitle[i]["title"].ToString() + ".txt", content,FileMode.Create); else Console.WriteLine("文件:"+content.Length.ToString()+"<"+filterBytes.ToString()+",被过滤"); } if (nextUrl != null) { url = nextUrl; } else { break; } } break; } //content = GetArticle("http://cache.tianya.cn/publicforum/Content/culture/1/245493.shtml", "不愿当好人"); //LintSys.WriteFile("write.txt", content); } static bool GetAuthor(string url,ref string writer,ref string title) { CookieContainer cc = new CookieContainer(); string content = Net.GetContent(url, ref cc); Regex regex = new Regex("<TITLE>(?<title>.*?)</TITLE>",RegexOptions.Singleline); Match m = regex.Match(content); if (m.Success) { title = m.Result("${title}"); } regex = new Regex(@"作者:<a .*?>(?<writer>.*?)</a>", RegexOptions.Singleline); m = regex.Match(content); if (m.Success) { writer = m.Result("${writer}"); } return true; } static string GetArticle(string url,string writer) { string content,filterContent="",replyContent; int j=0; CookieContainer cc = new CookieContainer(); while (true) { Console.WriteLine("连接" + url + "....."); content = Net.GetContent(url, ref cc); Regex regex = new Regex(writer.Replace("*","//*") + "</a>.*?</table>(?<content>.*?)(<TABLE)", RegexOptions.Singleline); MatchCollection mc = regex.Matches(content); LintSys.WriteLine("匹配回帖:" + mc.Count.ToString(), ConsoleColor.Yellow); // Console.WriteLine(mc.Count.ToString() + regex.ToString()); for (int i = 0; i < mc.Count; i++) { replyContent=mc[i].Result("${content}").Trim(); if (replyContent.Length>50&&replyContent.Substring(0, 2) != "作者" || replyContent.Length > 100&&replyContent.Substring(0, 2) == "作者" ) //回帖字数超过30认为有效 { filterContent += "(" + (j++).ToString() + ")/n" + replyContent; } } regex = new Regex(@"<a /S*? href=(?<url>/S*?)>下一页</a>",RegexOptions.Singleline); Match m = regex.Match(content); if (!m.Success) break; else url = m.Result("${url}"); } LintSys.WriteLine("下载完成....",ConsoleColor.DarkGreen); return Trans.ReplaceHtml(filterContent); } static SortedList[] GetTitle(string url,ref string nextUrl) { SortedList[] title; string content = ""; CookieContainer cc = new CookieContainer(); content = Net.GetContent(url, ref cc); // Console.WriteLine(content); LintSys.WriteFile("log.txt", content); //Regex regex = new Regex(@"<a href='(?<url>http://cache.tianya.cn/publicforum/S*)'.*?>(?<title>.*?)<.*?vwriter=(?<writer>)'.*?", RegexOptions.Singleline); Regex regex = new Regex(@"<a href='(?<url>http://cache.tianya.cn/publicforum/content/S*)'.*?>(?<title>.*?)<.*?vwriter=(?<writer>.*?)'", RegexOptions.Singleline); MatchCollection mc = regex.Matches(content); // Console.WriteLine(mc.Count.ToString()); // return null; title = new SortedList[mc.Count ]; for (int i = 0; i < mc.Count-1; i++) { title[i] = new SortedList(); title[i]["url"] = mc[i].Result("${url}"); title[i]["title"] = (new Regex(Reg.dirStr)).Replace(mc[i].Result("${title}"),""); title[i]["writer"] = mc[i].Result("${writer}"); //Console.WriteLine(mc[i].Result("${url}") + mc[i].Result("${title}") + mc[i].Result("${writer}")); } title[0] = new SortedList(); regex=new Regex(@"<a href=(?<url>/S*)?>下一页</a>",RegexOptions.Singleline); Match m = regex.Match(content); if (m.Success) { nextUrl = m.Result("${url}"); } return title; } }}
http://info95.vicp.net/info95/non-cgi/usr/5/5_6.rar