Geettest 极验证破解
来源:互联网 发布:java设计网上购物商城 编辑:程序博客网 时间:2024/06/05 01:39
seriously。。。为了要高点工商信息~~me也是拼了~~~~
没办法,只能破解了,还好网上很早就有人把这个破了~~开心~~~
所以本人也就本着一边抄袭一边学习的方法,把这个破玩意给破了~~~哈哈哈~~~
不过由于本人想看电视,不想train一个behaviou model出来~~所以就偷懒了~~~~
直接用random int 的方法来搞定这个模拟人类的拖拽过程,
网上有人用了linear regression来模拟也有用deep learning 来做的,还有透过一个tanh函数搞定的~~~
因为是randint的关系。所以需要尝试多次会破解~~这个是不完美的地方,但本着搞数据的方向,这样我也觉得可以接受,重点是数据可以搞下来就好了~~~~
废话少说,直接上代码
# coding=utf-8import sys;reload(sys);sys.setdefaultencoding('utf8');import requestsimport reimport StringIOfrom PIL import Imageimport randomimport mathimport timefrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.common.action_chains import ActionChainsfrom selenium import webdriverfrom bs4 import BeautifulSoupclass crack_picture(object): def __init__(self, img_url1, img_url2): self.img1, self.img2 = self.picture_get(img_url1, img_url2) def picture_get(self, img_url1, img_url2): hd = {"Host": "static.geetest.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"} img1 = StringIO.StringIO(self.repeat(img_url1, hd).content) img2 = StringIO.StringIO(self.repeat(img_url2, hd).content) return img1, img2 def repeat(self, url, hd): times = 10 while times > 0: try: ans = requests.get(url, headers=hd) return ans except: times -= 1 def pictures_recover(self): xpos = self.judge(self.picture_recover(self.img1, 'img1.jpg'), self.picture_recover(self.img2, 'img2.jpg')) - 6 return self.darbra_track(xpos) def picture_recover(self, img, name): a = [39, 38, 48, 49, 41, 40, 46, 47, 35, 34, 50, 51, 33, 32, 28, 29, 27, 26, 36, 37, 31, 30, 44, 45, 43, 42, 12, 13, 23, 22, 14, 15, 21, 20, 8, 9, 25, 24, 6, 7, 3, 2, 0, 1, 11, 10, 4, 5, 19, 18, 16, 17] im = Image.open(img) im_new = Image.new("RGB", (260, 116)) for row in range(2): for column in range(26): right = a[row * 26 + column] % 26 * 12 + 1 down = 58 if a[row * 26 + column] > 25 else 0 for w in range(10): for h in range(58): ht = 58 * row + h wd = 10 * column + w im_new.putpixel((wd, ht), im.getpixel((w + right, h + down))) im_new.save(name) return im_new def darbra_track(self, distance): return [[distance, 0.5, 1]] # crucial trace code was deleted def diff(self, img1, img2, wd, ht): rgb1 = img1.getpixel((wd, ht)) rgb2 = img2.getpixel((wd, ht)) tmp = reduce(lambda x, y: x + y, map(lambda x: abs(x[0] - x[1]), zip(rgb1, rgb2))) return True if tmp >= 200 else False def col(self, img1, img2, cl): for i in range(img2.size[1]): if self.diff(img1, img2, cl, i): return True return False def judge(self, img1, img2): for i in range(img2.size[0]): if self.col(img1, img2, i): return i return -1class gsxt(object): def __init__(self, br_name="phantomjs"): self.br = self.get_webdriver(br_name) self.wait = WebDriverWait(self.br, 10, 1.0) self.br.set_page_load_timeout(8) self.br.set_script_timeout(8) def input_params(self, name): self.br.get("http://www.gsxt.gov.cn/index") element = self.wait_for(By.ID, "keyword") element.send_keys(name) time.sleep(1.1) element = self.wait_for(By.ID, "btn_query") element.click() time.sleep(1.1) def drag_pic(self): return (self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_fullbg_slice")), self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_bg_slice"))) def wait_for(self, by1, by2): return self.wait.until(EC.presence_of_element_located((by1, by2))) def find_img_url(self, element): try: return re.findall('url\("(.*?)"\)', element.get_attribute('style'))[0].replace("webp", "jpg") except: return re.findall('url\((.*?)\)', element.get_attribute('style'))[0].replace("webp", "jpg") def emulate_track(self, tracks): element = self.br.find_element_by_class_name("gt_slider_knob") ActionChains(self.br).click_and_hold(on_element=element).perform() for x, y, t in tracks: print x, y, t track_list = self.get_track(x); ActionChains(self.br).click_and_hold(on_element=element).perform() time.sleep(0.15) for track in track_list: track_string = "{%d,%d}," % (track, y +22 ) # xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值,而轨迹变量里的是圆球的中心点,所以要加上圆球长度的一半。 # yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的,要保证最终的计算后的值是22,也就是圆球高度的一半 ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=y + 22).perform() # 间隔时间也通过随机函数来获得 time.sleep(random.randint(10, 50) / 100) print track_string # ActionChains(self.br).move_to_element_with_offset( # to_element=element, # xoffset=x + 22, # yoffset=y + 22).perform() # ActionChains(self.br).click_and_hold().perform() # time.sleep(t) ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform() time.sleep(0.1) ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform() time.sleep(0.1) ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform() time.sleep(0.1) time.sleep(0.24) ActionChains(self.br).release(on_element=element).perform() time.sleep(0.8) element = self.wait_for(By.CLASS_NAME, "gt_info_text") ans = element.text.encode("utf-8") print ans return ans def run(self): for i in [u'招商银行', u'交通银行', u'中国银行']: self.hack_geetest(i) time.sleep(1) self.quit_webdriver() def hack_geetest(self, company=u"招商银行"): flag = True self.input_params(company) while flag: img_url1, img_url2 = self.drag_pic() tracks = crack_picture(img_url1, img_url2).pictures_recover() tsb = self.emulate_track(tracks) if '通过' in tsb: time.sleep(1) soup = BeautifulSoup(self.br.page_source, 'html.parser') for sp in soup.find_all("a", attrs={"class": "search_list_item"}): print re.sub("\s+", "", sp.get_text().encode("utf-8")) # print sp.get_text() break elif '吃' in tsb: time.sleep(5) else: self.input_params(company) def quit_webdriver(self): self.br.quit() def get_webdriver(self, name): if name.lower() == "phantomjs": dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36") return webdriver.PhantomJS(desired_capabilities=dcap) elif name.lower() == "chrome": return webdriver.Chrome() def get_track(self, len): ''' 根据缺口的位置模拟x轴移动的轨迹 ''' pass list = [] # 间隔通过随机范围函数来获得 x = random.randint(1, 3) while len - x >= 5: list.append(x) len = len - x x = random.randint(1, 3) for i in xrange(len): list.append(1) return list;if __name__ == "__main__": # print crack_picture("http://static.geetest.com/pictures/gt/fc064fc73/fc064fc73.jpg", "http://static.geetest.com/pictures/gt/fc064fc73/bg/7ca363b09.jpg").pictures_recover() gsxt("chrome").run()
有几篇文章一定要谢谢的:
http://blog.csdn.net/mingzznet/article/details/54288288
https://www.zhihu.com/question/28833985
https://github.com/darbra/geetest
阅读全文
0 0
- Geettest 极验证破解
- 极验验证码破解
- 极验验证码的破解4-执行破解
- 破解极验(geetest)验证码
- 极验验证码的破解-开篇
- 破解极验(geetest)验证码
- 极验验证码破解(二)
- 极验验证码破解(三)
- 极验验证的滑动验证码破解
- silktest 破解 转帖未验证
- office2003验证破解
- 第一次破解验证码
- 验证码破解
- 验证码破解方法
- 破解图片验证码
- 验证码的破解
- 验证码破解技术
- loadrunuer破解验证码
- 1022. D进制的A+B (20)-----Python
- 如何提高c/c++大型项目的软件质量?
- 递推E 母牛的故事
- easyUI整理
- 常用排序法之一 ——冒泡排序法和选择排序法
- Geettest 极验证破解
- BZOJ1620: [Usaco2008 Nov]Time Management 时间管理
- Nginx反向代理 踩坑
- 递推F 加强斐波那契
- 1001. A+B Format (20)
- 递推G 爬楼梯
- java274(jdbc_mysql clob文本文件操作)
- [剑指offer]面试题18:树的子结构
- mac安装wget