
来源:互联网 发布:杀人漫画剧情知乎 编辑:程序博客网 时间:2024/05/28 15:35
Python语言: 校内网发帖机in Python,请勿滥用#!/usr/bin/python#encoding=utf-8#使用前请查找并更改用户名和密码import cookielib, urllib2, urllib, sys, timefrom xml.sax.saxutils import unescapefrom BeautifulSoup import BeautifulSoup          # For processing HTMLdef formalize(text):    result = ''    lines = text.split(u'\n')    for line in lines:        line = line.strip()        if len(line) == 0:            continue        result += line + u'\n\n'    return result#登陆校内网cj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))exheaders = [("User-Agent","Mozilla/4.0 (compatible; MSIE 7.1; Windows NT 5.1; SV1)"),]opener.addheaders=exheadersurl_login = 'http://xiaonei.com/Login.do'body = (('email','xxxxx@gmail.com'), ('password','*********')) #TODO:更改登录名和密码print "ERROR! you need to update the password to be successful!"req1 = opener.open(url_login, urllib.urlencode(body))  #这时,cookie已经进来了。#下载糗事百科,一个个发帖body = {'relative_optype':'publisher', 'blogControl':'1'}url_post = 'http://blog.xiaonei.com/NewEntry.do'#发帖部分count = 0for i in range(11, 12):    url = "http://qiushibaike.com/qiushi/best/all/page/%d" % i    data = urllib2.urlopen(url).readlines()    soup = BeautifulSoup("".join(data))    contents = soup.findAll('div', "content")    stories = [str(text) for text in contents]    for story in stories:        count += 1        print "processing page %d, %d items added" % (i, count)        minisoup = BeautifulSoup(story)        #text = ''.join([e for e in minisoup.recursiveChildGenerator() if isinstance(e, unicode)])        #text = urllib.unquote(unescape(text, {'"':'"'}))        text = str(minisoup)        #text = text.encode("utf-8")        title = '糗事-%d' % count        text += '<br/><a href="http://www.qiushibaike.com">来自糗事百科</a><br/>'        body['title'] = title        body['body'] = text        req2 = opener.open(url_post, urllib.urlencode(body)) #不出意外的话,就已经发帖成功了

0 0