Geettest 极验证破解

来源:互联网 发布:java设计网上购物商城 编辑:程序博客网 时间:2024/06/05 01:39

seriously。。。为了要高点工商信息~~me也是拼了~~~~

没办法,只能破解了,还好网上很早就有人把这个破了~~开心~~~

所以本人也就本着一边抄袭一边学习的方法,把这个破玩意给破了~~~哈哈哈~~~


不过由于本人想看电视,不想train一个behaviou model出来~~所以就偷懒了~~~~

直接用random int 的方法来搞定这个模拟人类的拖拽过程,

网上有人用了linear regression来模拟也有用deep learning 来做的,还有透过一个tanh函数搞定的~~~


因为是randint的关系。所以需要尝试多次会破解~~这个是不完美的地方,但本着搞数据的方向,这样我也觉得可以接受,重点是数据可以搞下来就好了~~~~


废话少说,直接上代码

# coding=utf-8import sys;reload(sys);sys.setdefaultencoding('utf8');import requestsimport reimport StringIOfrom PIL import Imageimport randomimport mathimport timefrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.common.action_chains import ActionChainsfrom selenium import webdriverfrom bs4 import BeautifulSoupclass crack_picture(object):    def __init__(self, img_url1, img_url2):        self.img1, self.img2 = self.picture_get(img_url1, img_url2)    def picture_get(self, img_url1, img_url2):        hd = {"Host": "static.geetest.com",              "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"}        img1 = StringIO.StringIO(self.repeat(img_url1, hd).content)        img2 = StringIO.StringIO(self.repeat(img_url2, hd).content)        return img1, img2    def repeat(self, url, hd):        times = 10        while times > 0:            try:                ans = requests.get(url, headers=hd)                return ans            except:                times -= 1    def pictures_recover(self):        xpos = self.judge(self.picture_recover(self.img1, 'img1.jpg'), self.picture_recover(self.img2, 'img2.jpg')) - 6        return self.darbra_track(xpos)    def picture_recover(self, img, name):        a = [39, 38, 48, 49, 41, 40, 46, 47, 35, 34, 50, 51, 33, 32, 28, 29, 27, 26, 36, 37, 31, 30, 44, 45, 43, 42, 12,             13, 23, 22, 14, 15, 21, 20, 8, 9, 25, 24, 6, 7, 3, 2, 0, 1, 11, 10, 4, 5, 19, 18, 16, 17]        im = Image.open(img)        im_new = Image.new("RGB", (260, 116))        for row in range(2):            for column in range(26):                right = a[row * 26 + column] % 26 * 12 + 1                down = 58 if a[row * 26 + column] > 25 else 0                for w in range(10):                    for h in range(58):                        ht = 58 * row + h                        wd = 10 * column + w                        im_new.putpixel((wd, ht), im.getpixel((w + right, h + down)))        im_new.save(name)        return im_new    def darbra_track(self, distance):        return [[distance, 0.5, 1]]        # crucial trace code was deleted    def diff(self, img1, img2, wd, ht):        rgb1 = img1.getpixel((wd, ht))        rgb2 = img2.getpixel((wd, ht))        tmp = reduce(lambda x, y: x + y, map(lambda x: abs(x[0] - x[1]), zip(rgb1, rgb2)))        return True if tmp >= 200 else False    def col(self, img1, img2, cl):        for i in range(img2.size[1]):            if self.diff(img1, img2, cl, i):                return True        return False    def judge(self, img1, img2):        for i in range(img2.size[0]):            if self.col(img1, img2, i):                return i        return -1class gsxt(object):    def __init__(self, br_name="phantomjs"):        self.br = self.get_webdriver(br_name)        self.wait = WebDriverWait(self.br, 10, 1.0)        self.br.set_page_load_timeout(8)        self.br.set_script_timeout(8)    def input_params(self, name):        self.br.get("http://www.gsxt.gov.cn/index")        element = self.wait_for(By.ID, "keyword")        element.send_keys(name)        time.sleep(1.1)        element = self.wait_for(By.ID, "btn_query")        element.click()        time.sleep(1.1)    def drag_pic(self):        return (self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_fullbg_slice")),                self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_bg_slice")))    def wait_for(self, by1, by2):        return self.wait.until(EC.presence_of_element_located((by1, by2)))    def find_img_url(self, element):        try:            return re.findall('url\("(.*?)"\)', element.get_attribute('style'))[0].replace("webp", "jpg")        except:            return re.findall('url\((.*?)\)', element.get_attribute('style'))[0].replace("webp", "jpg")    def emulate_track(self, tracks):        element = self.br.find_element_by_class_name("gt_slider_knob")        ActionChains(self.br).click_and_hold(on_element=element).perform()        for x, y, t in tracks:            print x, y, t            track_list = self.get_track(x);            ActionChains(self.br).click_and_hold(on_element=element).perform()            time.sleep(0.15)            for track in track_list:                track_string = "{%d,%d}," % (track, y +22 )                #         xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值,而轨迹变量里的是圆球的中心点,所以要加上圆球长度的一半。                #         yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的,要保证最终的计算后的值是22,也就是圆球高度的一半                ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=track + 22,                                                                 yoffset=y + 22).perform()                #         间隔时间也通过随机函数来获得                time.sleep(random.randint(10, 50) / 100)                print track_string            # ActionChains(self.br).move_to_element_with_offset(            #     to_element=element,            #     xoffset=x + 22,            #     yoffset=y + 22).perform()            # ActionChains(self.br).click_and_hold().perform()            # time.sleep(t)        ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()        time.sleep(0.1)        ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()        time.sleep(0.1)        ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()        time.sleep(0.1)        time.sleep(0.24)        ActionChains(self.br).release(on_element=element).perform()        time.sleep(0.8)        element = self.wait_for(By.CLASS_NAME, "gt_info_text")        ans = element.text.encode("utf-8")        print ans        return ans    def run(self):        for i in [u'招商银行', u'交通银行', u'中国银行']:            self.hack_geetest(i)            time.sleep(1)        self.quit_webdriver()    def hack_geetest(self, company=u"招商银行"):        flag = True        self.input_params(company)        while flag:            img_url1, img_url2 = self.drag_pic()            tracks = crack_picture(img_url1, img_url2).pictures_recover()            tsb = self.emulate_track(tracks)            if '通过' in tsb:                time.sleep(1)                soup = BeautifulSoup(self.br.page_source, 'html.parser')                for sp in soup.find_all("a", attrs={"class": "search_list_item"}):                    print re.sub("\s+", "", sp.get_text().encode("utf-8"))                    # print sp.get_text()                break            elif '吃' in tsb:                time.sleep(5)            else:                self.input_params(company)    def quit_webdriver(self):        self.br.quit()    def get_webdriver(self, name):        if name.lower() == "phantomjs":            dcap = dict(DesiredCapabilities.PHANTOMJS)            dcap["phantomjs.page.settings.userAgent"] = (                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36")            return webdriver.PhantomJS(desired_capabilities=dcap)        elif name.lower() == "chrome":            return webdriver.Chrome()    def get_track(self, len):        '''        根据缺口的位置模拟x轴移动的轨迹        '''        pass        list = []        #     间隔通过随机范围函数来获得        x = random.randint(1, 3)        while len - x >= 5:            list.append(x)            len = len - x            x = random.randint(1, 3)        for i in xrange(len):            list.append(1)        return list;if __name__ == "__main__":    # print crack_picture("http://static.geetest.com/pictures/gt/fc064fc73/fc064fc73.jpg", "http://static.geetest.com/pictures/gt/fc064fc73/bg/7ca363b09.jpg").pictures_recover()    gsxt("chrome").run()


有几篇文章一定要谢谢的:

http://blog.csdn.net/mingzznet/article/details/54288288

https://www.zhihu.com/question/28833985

https://github.com/darbra/geetest

原创粉丝点击