存档
来源:互联网 发布:java 加断点快捷键 编辑:程序博客网 时间:2024/05/06 08:37
# -*- coding: utf-8 -*-import urllib2,cookielibimport urllibimport cStringIOimport datetimefrom PIL import Imagefrom lxml import etreeimport sysreload(sys)sys.setdefaultencoding('utf8')def setOpener(): cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) opener.addheaders.append(('User-Agent','Mozilla/5.0 (Windows NT 5.1; rv:25.0) Gecko/20100101 Firefox/25.0')) return openerdef md5(str): import hashlib import types if type(str) is types.StringType: m = hashlib.md5() m.update(str) return m.hexdigest() else: return ''class spider: def __init__(self): self.opener=setOpener()#保存cookie信息 self.imgUrl='http://210.42.121.241/servlet/GenImg' self.loginUrl='http://210.42.121.241/servlet/Login' self.queryScoreUrl='http://210.42.121.241/servlet/Svlt_QueryStuScore' self.studentID='' self.password='' self.captcha=''#验证码 self.mainPageContent='' def getCaptcha(self): res =self.opener.open(urllib2.Request(self.imgUrl)) tempIm = cStringIO.StringIO(res.read()) im = Image.open(tempIm) return im #im.save('test.jpg') #im.show() #self.captcha = raw_input("验证码:") def loginMainPage(self): #需要post的数据 pwdMD5=md5(self.password) postdata = urllib.urlencode({ 'id':self.studentID, 'pwd':pwdMD5, 'xdvfb':self.captcha }) req = urllib2.Request( url = self.loginUrl, data = postdata ) response = self.opener.open(req) self.mainPageContent = response.read().decode('gb2312') def getAndSaveScore(self): page=etree.HTML(self.mainPageContent) text=page.xpath('//div[@id="school"]/@onclick') try: token=text[0][65:101] except IndexError: print "Error:未能正确打开主页面" return 0 else: GMT_FORMAT = '%a, %d %b %Y %H:%M:%S GMT' GMT_time=datetime.datetime.utcnow().strftime(GMT_FORMAT) getParams=urllib.urlencode({ 'csrftoken':token, 'learnType':'', 'scoreFlag':'0', 't':GMT_time, 'term':'', 'year':'0' }) url = self.queryScoreUrl fullUrl=url+'?'+getParams #print fullUrl req = urllib2.Request(fullUrl) response = self.opener.open(req) result = response.read().decode('gb2312') # 由于该网页是gb2312的编码,所以需要解码 #print result out=open('inputScore.html','wb') out.write(result) out.close() return 1#mySpider=spider()#mySpider.getCaptcha()#mySpider.loginMainPage()#mySpider.getAndSaveScore()
0 0
- 存档
- 存档
- 存档
- 存档
- 存档
- 存档
- 存档存档.....
- 个人存档
- soa存档
- COM+(存档)
- JXC存档
- 问题存档
- Android 存档
- c# 存档
- 错误存档
- [存档]NtGlobalFlags
- uinty3d 存档
- 好书存档
- mysql show status详解
- java项目之——坦克大战04.1
- 大数据系列修炼-Scala课程13+14
- 第四章:Linear Models for Classification exercise 25-26
- AAF技术及其在后期制作系统中的应用
- 存档
- 奥运赛事,精彩纷呈
- hdu 5811 Colosseo (拓扑排序 + 最长上升子序列)
- 机器人运动学_不同D-H矩阵的对比
- 浅析ButterKnife的实现 (一) —— 搭建开发框架
- Codeforces Round #367 (Div. 2) A(暴力) B(二分查找) C(DP) D(01字典树)
- AFNetworking 文件上传Data,File图片,文件等上传
- Mysql数据库优化
- 存档2