问卷星python自动提交

来源:互联网 发布:ubuntu默认开启小键盘 编辑:程序博客网 时间:2024/05/16 18:01

这是需要验证码的爬虫,不需要验证码的只需在此爬虫修改一些
1.下载fiddler4(用于抓包)
方式自行百度
2.查看并分析cookie
里面的变量记得保证随机

        'Host': 'www.wjx.cn',#host地址        'Connection': 'keep-alive',        'X-Forwarded-For': ip,#自行设置ip,随机ip99%为外国ip,如果要中国大陆的,自行搜索        'Origin': 'https://www.wjx.cn',        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',#伪装浏览器        'Content-Type': 'application/x-www-form-urlencoded',        'Accept': '*/*',        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',#问卷地址        'Accept-Encoding': 'gzip, deflate, br',        'Accept-Language': 'zh-CN,zh;q=0.9',        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep,#cookie是最重要的,如果本cookie不能用,抓包换cookie,记得Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep(这句)变量改成这样        'RA-Ver': '2.4',        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',

3.识别验证码
参考http://blog.csdn.net/gcs1024/article/details/77807537
4.添加data
抓包分析传输数据(选项),每个问卷都不一样
5.
其他的杂项自行分析吧
示例代码

import randomimport requestsimport urllib.parseimport urllib.requestfrom PIL import Imageimport pytesseractimport osimport randomfrom time import time,strftime, localtimeimport time as tqid=str(16454455)rnqian=str(2063096382)def download(qid,header,i):        url='https://www.wjx.cn/AntiSpamImageGen.aspx?q='+qid+'&t='+str(int(time() * 1000))        req = urllib.request.Request(url,headers=header)        data = urllib.request.urlopen(req).read()        pic = open('%d.gif'%(i),'wb')        pic.write(data)        pic.close()def binarizing(img): #input: gray image    threshold=30    pixdata = img.load()    w, h = img.size    for y in range(h):        for x in range(w):            if pixdata[x, y] > threshold:                pixdata[x, y] = 255            else:                pixdata[x, y] = 0    return imgdef depoint(img):   #input: gray image    pixdata = img.load()    w,h = img.size    for y in range(1,h-1):        for x in range(1,w-1):            count = 0            if pixdata[x,y-1] > 245:                count = count + 1            if pixdata[x,y+1] > 245:                count = count + 1            if pixdata[x-1,y] > 245:                count = count + 1            if pixdata[x+1,y] > 245:                count = count + 1            if count >2:                pixdata[x,y] = 255    return imgdef shibie(img):    imgry = img.convert('L')    threshold = 140    table = []    for i in range(256):        if i < threshold:            table.append(0)        else:            table.append(1)    out = imgry.point(table, '1')    print(str(pytesseract.image_to_string(out)).strip())    return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码def post(qid,rnqian,i):    timeg=str(int(time() * 1000))    t.sleep(10)    timep=str(int(time() * 1000))    ip=str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4))    rnhou=str(random.randint(10000000,99999999))    headerget={        'Host': 'www.wjx.cn',        'Connection': 'keep-alive',        'X-Forwarded-For': ip,        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)      Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',        'Accept-Encoding': 'gzip, deflate, br',        'Accept-Language': 'zh-CN,zh;q=0.9',        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611;  lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361,101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316;    Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timeg,        'RA-Ver': '2.4',        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',    }    headerpost = {        'Host': 'www.wjx.cn',        'Connection': 'keep-alive',        'X-Forwarded-For': ip,        'Origin': 'https://www.wjx.cn',        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',        'Content-Type': 'application/x-www-form-urlencoded',        'Accept': '*/*',        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',        'Accept-Encoding': 'gzip, deflate, br',        'Accept-Language': 'zh-CN,zh;q=0.9',        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep,        'RA-Ver': '2.4',        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',    }    download(qid,headerget,i)    t.sleep(5)    img = Image.open('%d.gif'%(i)).convert("L")    img = binarizing(img)    img = depoint(img)    yanzhengma=shibie(img)    timec=str(int(time() * 1000))    thedata = {'submitdata': '1$'+str(random.randint(1,5))+'}2$'+str(random.randint(1,10))+'}3$'+str(random.randint(1,3))+'}4$'+str(random.randint(1,4))+'}5$1<'+str(random.randint(1,9))+',2<'+str(random.randint(1,5))+',3<'+str(random.randint(1,5))+',4<'+str(random.randint(1,5))+',5<'+str(random.randint(1,5))+',6<'+str(random.randint(1,5))+',7<'+str(random.randint(1,5))+',8<'+str(random.randint(1,5))+',9<'+str(random.randint(1,5))+'}6$'+str(random.randint(1,3))+'}7$'+str(random.randint(1,7))+'}8$'+str(random.randint(1,3))+'|'+str(random.randint(3,6))+'|'+str(random.randint(7,9))+'}9$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}10$'+str(random.randint(1,3))+'}11$'+str(random.randint(1,4))+'}12$1<1,2<4,3<6,4<3,5<8,6<3,7<6,8<5}13$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}14$2|5}15$'+str(random.randint(1,2))+'}16$'+str(random.randint(1,2))+'}17$'+str(random.randint(1,2))+'}18$'+str(random.randint(1,2))+'}19$'+str(random.randint(1,2))+'}20$'+str(random.randint(1,4))+'}21$'+str(random.randint(1,3))}    url1='https://www.wjx.cn/handler/processjq.ashx?submittype=1&curID='+qid+'&t='+timec+'&starttime='+(str(strftime("%Y/%m/%d%H:%M:%S", localtime())).replace('/','%2F')).replace(':','%3A')+'&validate_text='+str(yanzhengma)+'&rn='+rnqian+'&sd='+('https://www.wjx.cn/'.replace('/','%2F')).replace(':','%3A')#改rn        t.sleep(10)    r = requests.post(url1, headers = headerpost,data = thedata,allow_redirects=False)    print(r.text)main函数(自写)(可参考http://download.csdn.net/download/gcs1024/10122645)main(qid,rnqian)
原创粉丝点击