从百度空间到CSDN——博客搬家源码

来源：互联网发布：淘宝0销量下架删除编辑：程序博客网时间：2024/05/06 06:34

注意：下面的方法在csdn博客改版以后无法使用，因为现在csdn博客不支持metadata api，不知道什么时候可以支持。

1.原文连接

http://hi.baidu.com/cnjsp/blog/item/e175cf1b27bc6af6ae513335.html

2.心得

本方法我测试过，是可以用来的，一则感觉思路挺新颖了，程序员自己写代码解决自己的事情。另一个可以通过这个实例学习一下java，所以我贴出我修改后的java代码。

具体思路可以参见原文。

3.代码

CSDNPost.java

package cn.mingyuan.baidu2csdn.core;import java.io.FileOutputStream;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.xmlrpc.XmlRpcException;import org.apache.xmlrpc.client.XmlRpcClient;import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;/** * csdn博文 *  * @author mingyuanonline@gmail.com *  */public class CSDNPost {/** * 博文创建日期 */private Date dateCreated;/** * 博文内容 */private String description;/** * 标题 */private String title;/** * 博文分类 */private String[] categories;public CSDNPost() {}public CSDNPost(String title, String description, String[] categories,Date dateCreated) {this.dateCreated = dateCreated;this.description = description;this.title = title;this.categories = categories;}public Date getDateCreated() {return dateCreated;}public void setDateCreated(Date dateCreated) {this.dateCreated = dateCreated;}public String getDescription() {return description;}public void setDescription(String description) {this.description = description;}public String getTitle() {return title;}public void setTitle(String title) {this.title = title;}public String[] getCategories() {return categories;}public void setCategories(String[] categories) {this.categories = categories;}/** * xml-rpc配置 */private static XmlRpcClientConfigImpl config;/** * xml-rpcClient */private static XmlRpcClient client;static {config = new XmlRpcClientConfigImpl();try {// 此处请将telnetor替换为您的用户名config.setServerURL(new URL("http://blog.csdn.net/xw13106209/services/metablogapi.aspx"));} catch (MalformedURLException e) {System.out.println("请检查url");}client = new XmlRpcClient();client.setConfig(config);}/** * 日志记录 *  * @param log *            log */private void writelog(String log) {FileOutputStream fos = null;try {fos = new FileOutputStream("post.log", true);fos.write((log + "\r\n").getBytes());fos.flush();fos.close();} catch (IOException e) {System.out.println("写入日志错误：" + log);}}/** * 发布 */public void publish() {Map<String, Object> struct = new HashMap<String, Object>();struct.put("dateCreated", dateCreated);struct.put("description", description);struct.put("title", title);struct.put("categories", categories);//Object[] params = new Object[] { "your usrname",//"replace it with your username",//"replace it with your password", struct, true };Object[] params = new Object[] { "xw13106209","xw13106209","password", struct, true };String blogid = null;try {blogid = (String) client.execute("metaWeblog.newPost", params);} catch (XmlRpcException e) {writelog("导入出现错误：title=" + title);System.out.println("导入出现错误：title=" + title);}writelog(title + ">> 导入完毕,生成博文id为>>" + blogid);System.out.println(title + ">> 导入完毕,生成博文id为>>" + blogid);struct.clear();}public static void main(String[] args) {CSDNPost post = new CSDNPost();post.publish();}}

BaiduHi

package cn.mingyuan.baidu2csdn.core;import java.util.Date;/** * 百度博客 *  * @author mingyuanonline@gmail.com *  */public class BaiduHi {/** * 标题 */private String title;/** * 内容 */private String description;/** * 分类 */private String categories;/** * 发布日期 */private Date dateCreated;public String getTitle() {return title;}public String getDescription() {return description;}public String getCategories() {return categories;}public Date getDateCreated() {return dateCreated;}public void setTitle(String title) {this.title = title;}public void setDescription(String description) {this.description = description;}public void setCategories(String categories) {this.categories = categories;}public void setDateCreated(Date dateCreated) {this.dateCreated = dateCreated;}public BaiduHi(String title, String description, String categories,Date dateCreated) {this.title = title;this.description = description;this.categories = categories;this.dateCreated = dateCreated;}public BaiduHi() {// TODO Auto-generated constructor stub}/** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub}}

BaiduHiFetcher

package cn.mingyuan.baidu2csdn.core;  import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.util.ArrayList;import java.util.Date;import java.util.List;import java.util.Stack;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * 百度博客数据抓取及解析 *  * @author mingyuanonline@gmail.com *  */public class BaiduHiFetcher {/** * 下载页面 *  * @param url *            url * @return 网页源码 */private String downloadPage(String url) {URLConnection conn;InputStream in;BufferedReader reader = null;StringBuilder sb = new StringBuilder();String line = null;try {conn = new URL(url).openConnection();in = conn.getInputStream();reader = new BufferedReader(new InputStreamReader(in, "gb2312"));while ((line = reader.readLine()) != null) {sb.append(line);}in.close();reader.close();} catch (MalformedURLException e) {System.out.println("请检查url是否规范");} catch (IOException e) {System.out.println("读取源码错误:" + url);}return sb.toString();}/** * 获取页面博文链接 *  * @param html *            网页源码 * @return 页面中的博文链接 */private List<String> getPostLinks(String html) {// 分析页面内容，取得页面中的文章链接String titleDivRegex = "<div[\\s]class=\"tit\"><a[\\s]href=[^<>]+?target=\"_blank\">.+?</div>";Pattern titleDivPattern = Pattern.compile(titleDivRegex);Matcher titleDivMatcher = titleDivPattern.matcher(html);List<String> posts = new ArrayList<String>();while (titleDivMatcher.find()) {String div = titleDivMatcher.group();String titleUrl = div.substring(div.indexOf("/"), div.indexOf("\" target"));posts.add("http://hi.baidu.com" + titleUrl);}return posts;}/** * <p> * 获取博客总页数 <br> * 我的博客内容有16页，有上一页，下一页，尾页等这样的标志，如果博文少的话可能这些标志不会出现，请修改此方法 *  * @param html *            源码（最好是第一页） * @return 博客总页数 */private int getTotalPages(String html) {// 页码// <a href="/cnjsp/blog/index/16"// mce_href="cnjsp/blog/index/16">[尾页]</a>String pageRegex = "<a[\\s]href=\"/cnjsp/blog/index/[\\d][\\d]\">\\[尾页\\]</a>";Pattern pagePattern = Pattern.compile(pageRegex);Matcher pageMatcher = pagePattern.matcher(html);String totalPagesStr = null;int pages = 0;if (pageMatcher.find()) {String pagelink = pageMatcher.group();totalPagesStr = pagelink.replaceAll("<a[\\s]href=\"/cnjsp/blog/index/", "").replaceAll("\">\\[尾页\\]</a>", "");pages = Integer.parseInt(totalPagesStr);}return pages;}/** * <p> * 获取博客的所有博文的地址 <br> * 没有对url进行编码处理，如果博客地址含中文，请对url进行处理 *  * @param blogUrl *            博客地址 * @return 所有博文地址，存放于栈中，使用的时候请使用pop方法取出元素，这样可以保证按照最先发表的博文最先处理 */public Stack<String> getAllPostLink(String blogUrl) {Stack<String> posts = new Stack<String>();// 1.下载第一页String firstPageHtml = downloadPage(blogUrl + "/blog/index/0");// 2.获取博文总页数//int totalPages = getTotalPages(firstPageHtml);int totalPages = 2;// 3.下载各摘要页posts.addAll(getPostLinks(firstPageHtml));if (totalPages < 1) {return posts;}for (int i = 1; i <= totalPages; i++) {String page = downloadPage(blogUrl + "/blog/index/" + i);posts.addAll(getPostLinks(page));}return posts;}/** * 解析博文，获取标题，发布时间，内容，分类等信息 *  * @param postUrl *            博文地址 * @return 封装了博文信息的BaiduHi */public BaiduHi getBaiduHi(String postUrl) {String html = downloadPage(postUrl);// /<div class="tit">String titleDivRegex = "<div[\\s]id=\"m_blog\"[\\s]class=\"modbox\"[\\s]style=\"overflow-x:hidden;\"><div[\\s]class=\"tit\">.+?</div><div[\\s]class=\"date\">";Pattern titleDivPattern = Pattern.compile(titleDivRegex);Matcher titleDivMatcher = titleDivPattern.matcher(html);String title = null;if (titleDivMatcher.find()) {title = titleDivMatcher.group().replaceAll("<div[\\s]id=\"m_blog\"[\\s]class=\"modbox\"[\\s]style=\"overflow-x:hidden;\"><div[\\s]class=\"tit\">","").replaceAll("</div><div[\\s]class=\"date\">", "").trim();}String dateDivRegex = "<div[\\s]class=\"date\">.+?</div>";Pattern dateDivPattern = Pattern.compile(dateDivRegex);Matcher dateMatcher = dateDivPattern.matcher(html);String dateStr = null;Date postDate = null;if (dateMatcher.find()) {dateStr = dateMatcher.group().replaceAll("<div[\\s]class=\"date\">", "").replaceAll("</div>", "").trim();postDate = getDate(dateStr);}String textDivRegex = "<div[\\s]id=\"blog_text\"[\\s]class=\"cnt\"[\\s]+>.+?</div>";Pattern textDivPattern = Pattern.compile(textDivRegex);Matcher textMatcher = textDivPattern.matcher(html);String text = null;if (textMatcher.find()) {text = textMatcher.group().replaceAll("<div[\\s]id=\"blog_text\"[\\s]class=\"cnt\"[\\s]+>", "").replaceAll("</div>", "").trim();}String categoriesRegex = "title=\"查看该分类中所有文章\">类别：.+?</a>";Pattern categoriesDivPattern = Pattern.compile(categoriesRegex);Matcher categoriesMatcher = categoriesDivPattern.matcher(html);String categories = null;if (categoriesMatcher.find()) {categories = categoriesMatcher.group().replaceAll("title=\"查看该分类中所有文章\">类别：", "").replaceAll("</a>", "").trim();}BaiduHi hi = new BaiduHi();hi.setTitle(title);hi.setDescription(text);hi.setCategories(categories);hi.setDateCreated(postDate);return hi;}/** * 解析博文中的日期格式返回Date类型 * 日期格式为：2011年07月01日 星期五 下午 01:05 * @param str *            博文中的日期 * @return Date类型日期 */@SuppressWarnings("deprecation")private Date getDate(String str) {String yearStr = str.substring(0, str.indexOf("年")).trim();String monthStr = str.substring(str.indexOf("年"), str.indexOf("月")).replace("年", "").trim();String dayStr = str.substring(str.indexOf("月"), str.indexOf("日")).replace("月", "").trim();String timeStr = str.substring(str.indexOf("午")).replace("午", "").trim();String hourStr = timeStr.split(":")[0];String minutesStr = timeStr.split(":")[1];Date date = new Date();date.setYear(Integer.parseInt(yearStr) - 1900);date.setMonth(Integer.parseInt(monthStr) - 1);date.setDate(Integer.parseInt(dayStr));if (str.contains("下午")) {date.setHours(Integer.parseInt(hourStr) + 12);} else {date.setHours(Integer.parseInt(hourStr));}date.setMinutes(Integer.parseInt(minutesStr));return date;}}

Transfer

package cn.mingyuan.baidu2csdn.core;import java.util.Stack;/** * 搬家 *  * @author mingyuanonline@gmail.com *  */public class Transfer {/** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub//String postUrl = "http://hi.baidu.com/cnjsp";String postUrl = "http://hi.baidu.com/xwdreamer";BaiduHiFetcher fetcher = new BaiduHiFetcher();Stack<String> urls = null;urls = fetcher.getAllPostLink(postUrl);while (!urls.isEmpty()) {String url = urls.pop();BaiduHi hi = null;hi = fetcher.getBaiduHi(url);CSDNPost post = new CSDNPost();post.setTitle(hi.getTitle());post.setDescription(hi.getDescription());post.setCategories(new String[] { hi.getCategories() });post.setDateCreated(hi.getDateCreated());post.publish();try {Thread.sleep(5 * 1000);} catch (InterruptedException e) {System.out.println("休眠出错");}}}}

DeletePostById

package cn.mingyuan.baidu2csdn.core;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import org.apache.xmlrpc.XmlRpcException;import org.apache.xmlrpc.client.XmlRpcClient;import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;public class DeletePostById {private static XmlRpcClientConfigImpl config;private static XmlRpcClient client;static {config = new XmlRpcClientConfigImpl();try {config.setServerURL(new URL("http://blog.csdn.net/telnetor/services/metablogapi.aspx"));} catch (MalformedURLException e) {System.out.println("请检查url");}client = new XmlRpcClient();client.setConfig(config);}/** * 删除帖子 *  * @param appkey *            appkey，可以任意，这是一个忽略的值 * @param postid *            帖子id * @param username *            用户名 * @param password *            密码 * @param publish *            博客在帖子被删除之后是否重新发布 */public static void delete(String appkey, String postid, String username,String password, boolean publish) {Object[] params = new Object[] { "ignored value", postid, username,password, true };try {client.execute("blogger.deletePost", params);} catch (XmlRpcException e) {System.out.println("删除出错，postid=" + postid);}System.out.println(postid + "删除完毕");}/** * @param args * @throws InterruptedException */public static void main(String[] args) throws InterruptedException {BufferedReader reader = null;String line;try {reader = new BufferedReader(new InputStreamReader(new FileInputStream("content")));while ((line = reader.readLine()) != null) {line = line.split("生成博文id为：")[1];delete("ignored", line, "your username", "your password", true);Thread.sleep(1000 * 10);}} catch (FileNotFoundException e1) {System.out.println("文件没找到");} catch (IOException e) {System.out.println("读取文件失败");}}}