遍历CSDN博客

来源:互联网 发布:数据库insert 编辑:程序博客网 时间:2024/05/06 04:14
--http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contentsfunction saveData(data)local file = io.open("temp.html", "w")file:write(data)file:close()endfunction getPageCount(url)local http = require("socket.http")local resp = http.request(url)local s = string.find(resp, "<div id=\"papelist\" class=\"pagelist\">")local e = string.find(resp, "</div>", s)local divData = string.sub(resp, s, e + 5)--print(divData)local i, j = string.find(divData, "共%d+页")local pageCount = string.sub(divData, i + 3, j - 3)return pageCountendfunction getTitles(username)if(username == nil or username == "") thenprint("username is nil")returnendlocal preUrl = "http://blog.csdn.net/"local endUrl = "?viewmode=contents"local url = preUrl .. username .. endUrllocal pageCount = getPageCount(url)for i = 1, pageCount dolocal blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrllocal http = require("socket.http")local resp = http.request(blogUrl)local pos = 1_, pos = string.find(resp, "link_title", pos)while(pos ~= nil) dolocal i, j = string.find(resp, "details/%d+\">", pos)--print("pageid = " ..  string.sub(resp, i + 8, j - 2))local k, _ = string.find(resp, "</a></span>", j)print(string.sub(resp, j + 11, k - 23))_, pos = string.find(resp, "link_title", pos)endendendgetTitles("leixiaohua1020")



如果想保存一篇指定的博文(只要正文),该怎么做呢?

其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。

以下lua脚本可以获得

<div id="article_content" class="article_content">正文</div>


start.html

<html><head>    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />    <script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script>    <link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" />    <link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" />    <link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" /></head><body><div id="container"><script type="text/javascript">    var username = "x_iya";    var _blogger = username;    var blog_address = "http://blog.csdn.net/x_iya";    var static_host = "http://static.blog.csdn.net";    var currentUserName = "";  </script><div id="body"><div id="main"><div class="main"><div id="article_details" class="details">
end.html

</div>                 </div></div><script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script></div></body></html> 


lua代码:

function GetHtml(url)local http = require("socket.http")local resp = http.request(url)local s = string.find(resp, "<div id=\"article_content\" class=\"article_content\">")local e = string.find(resp, "<!--", s)local data = string.sub(resp, s, e - 1)return dataendfunction SaveData(data)local file = io.open("csdn.html", "w")file:write(data)file:close()endfunction ReadData(filepath)local file = io.open(filepath, "r")local data = file:read("*a")file:close()return dataend--local url = "http://blog.csdn.net/x_iya/article/details/52327827"if #arg == 1 thenlocal url = arg[1]local startData = ReadData("start.html")local endData = ReadData("end.html")local html = startData .. GetHtml(url) .. endDataSaveData(html)elseprint("Usage: lua csdn.lua url")end




0 0