问卷星python自动提交
来源:互联网 发布:ubuntu默认开启小键盘 编辑:程序博客网 时间:2024/05/16 18:01
这是需要验证码的爬虫,不需要验证码的只需在此爬虫修改一些
1.下载fiddler4(用于抓包)
方式自行百度
2.查看并分析cookie
里面的变量记得保证随机
'Host': 'www.wjx.cn',#host地址 'Connection': 'keep-alive', 'X-Forwarded-For': ip,#自行设置ip,随机ip99%为外国ip,如果要中国大陆的,自行搜索 'Origin': 'https://www.wjx.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',#伪装浏览器 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*', 'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',#问卷地址 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep,#cookie是最重要的,如果本cookie不能用,抓包换cookie,记得Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep(这句)变量改成这样 'RA-Ver': '2.4', 'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',
3.识别验证码
参考http://blog.csdn.net/gcs1024/article/details/77807537
4.添加data
抓包分析传输数据(选项),每个问卷都不一样
5.
其他的杂项自行分析吧
示例代码
import randomimport requestsimport urllib.parseimport urllib.requestfrom PIL import Imageimport pytesseractimport osimport randomfrom time import time,strftime, localtimeimport time as tqid=str(16454455)rnqian=str(2063096382)def download(qid,header,i): url='https://www.wjx.cn/AntiSpamImageGen.aspx?q='+qid+'&t='+str(int(time() * 1000)) req = urllib.request.Request(url,headers=header) data = urllib.request.urlopen(req).read() pic = open('%d.gif'%(i),'wb') pic.write(data) pic.close()def binarizing(img): #input: gray image threshold=30 pixdata = img.load() w, h = img.size for y in range(h): for x in range(w): if pixdata[x, y] > threshold: pixdata[x, y] = 255 else: pixdata[x, y] = 0 return imgdef depoint(img): #input: gray image pixdata = img.load() w,h = img.size for y in range(1,h-1): for x in range(1,w-1): count = 0 if pixdata[x,y-1] > 245: count = count + 1 if pixdata[x,y+1] > 245: count = count + 1 if pixdata[x-1,y] > 245: count = count + 1 if pixdata[x+1,y] > 245: count = count + 1 if count >2: pixdata[x,y] = 255 return imgdef shibie(img): imgry = img.convert('L') threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) out = imgry.point(table, '1') print(str(pytesseract.image_to_string(out)).strip()) return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码def post(qid,rnqian,i): timeg=str(int(time() * 1000)) t.sleep(10) timep=str(int(time() * 1000)) ip=str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4)) rnhou=str(random.randint(10000000,99999999)) headerget={ 'Host': 'www.wjx.cn', 'Connection': 'keep-alive', 'X-Forwarded-For': ip, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4', 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361,101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timeg, 'RA-Ver': '2.4', 'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3', } headerpost = { 'Host': 'www.wjx.cn', 'Connection': 'keep-alive', 'X-Forwarded-For': ip, 'Origin': 'https://www.wjx.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*', 'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep, 'RA-Ver': '2.4', 'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3', } download(qid,headerget,i) t.sleep(5) img = Image.open('%d.gif'%(i)).convert("L") img = binarizing(img) img = depoint(img) yanzhengma=shibie(img) timec=str(int(time() * 1000)) thedata = {'submitdata': '1$'+str(random.randint(1,5))+'}2$'+str(random.randint(1,10))+'}3$'+str(random.randint(1,3))+'}4$'+str(random.randint(1,4))+'}5$1<'+str(random.randint(1,9))+',2<'+str(random.randint(1,5))+',3<'+str(random.randint(1,5))+',4<'+str(random.randint(1,5))+',5<'+str(random.randint(1,5))+',6<'+str(random.randint(1,5))+',7<'+str(random.randint(1,5))+',8<'+str(random.randint(1,5))+',9<'+str(random.randint(1,5))+'}6$'+str(random.randint(1,3))+'}7$'+str(random.randint(1,7))+'}8$'+str(random.randint(1,3))+'|'+str(random.randint(3,6))+'|'+str(random.randint(7,9))+'}9$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}10$'+str(random.randint(1,3))+'}11$'+str(random.randint(1,4))+'}12$1<1,2<4,3<6,4<3,5<8,6<3,7<6,8<5}13$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}14$2|5}15$'+str(random.randint(1,2))+'}16$'+str(random.randint(1,2))+'}17$'+str(random.randint(1,2))+'}18$'+str(random.randint(1,2))+'}19$'+str(random.randint(1,2))+'}20$'+str(random.randint(1,4))+'}21$'+str(random.randint(1,3))} url1='https://www.wjx.cn/handler/processjq.ashx?submittype=1&curID='+qid+'&t='+timec+'&starttime='+(str(strftime("%Y/%m/%d%H:%M:%S", localtime())).replace('/','%2F')).replace(':','%3A')+'&validate_text='+str(yanzhengma)+'&rn='+rnqian+'&sd='+('https://www.wjx.cn/'.replace('/','%2F')).replace(':','%3A')#改rn t.sleep(10) r = requests.post(url1, headers = headerpost,data = thedata,allow_redirects=False) print(r.text)main函数(自写)(可参考http://download.csdn.net/download/gcs1024/10122645)main(qid,rnqian)
阅读全文
0 0
- 问卷星python自动提交
- 问卷星表单自动提交
- JS实现问卷星自动填问卷脚本,两秒自动提交
- 对无验证码的问卷星问卷自动填充
- python问卷
- 自动填写问卷
- 问卷
- 问卷
- Python实现博客日志自动提交程序
- 【Python-3.3】字典存储调查问卷
- 自动提交
- 浏览器自动填写问卷或者单选/表单等
- 问卷星出题第二题图片
- 20161215-弹出层·参考问卷星
- 回车自动提交 禁止回车自动提交
- 回车自动提交,禁止回车自动提交
- 自动提交表单
- 定时自动提交表单
- tuxedo协议FML32远程调用
- push key for github
- 【html5】 canvas小结
- 13.2如何使用通用Mapper
- PHP获取PHP执行的时间
- 问卷星python自动提交
- 如何用好 github 中的 watch、star、fork
- linux中的strip命令简介
- Ubuntu下Qt5.6.2开发环境搭建
- 设置 当前样式dimstyle
- 存储过程更新数据
- spark取相同条件下最早记录
- Ubuntu 16.04 安装CUDA8.0+Cudnn6.0+TensorFlow+Caffe安装
- java安装1.8的经验和Error: Registry key 'Software\JavaSoft\Java Runtime Environment'\CurrentVers问题处理