Python入门：验证码破解（二）

来源：互联网发布：计算机技术与软件初级编辑：程序博客网时间：2024/05/04 14:43

一、定位验证码元素
二、下载验证图片
三、破解验证码

#test.pyfrom selenium import webdriverimport osimport urllibfrom PIL import Imageimport pytesseractimport timedriver = webdriver.PhantomJS()driver.get("http://10.70.18.33:8083/shopxx-mobile/password/find.jhtml")pic_url = driver.find_element_by_id("captchaImage").get_attribute('src')pic_name = pic_url.split("=")[1] + '.jpeg'pic_path = os.getcwd().split(os.path.basename(os.getcwd()))[0] + 'img/'def loadPic(pic_url,pic_path,pic_name):    urllib.request.urlretrieve(pic_url,pic_path + pic_name)def convert(pic_path,pic_open):    t = str(time.time()).split('.')[0]    imgrey = pic_open.convert('L')    threshold = 150    table = []    for i in range(256):        if i < threshold:            table.append(0)        else:            table.append(1)    #用列表生成一个图片    out = imgrey.point(table,'1')    out.save(pic_path + (pic_name.split('.')[0] + '--' + t + '.jpeg'),'jpeg')    a = pic_path + (pic_name.split('.')[0] + '--' + t + '.jpeg')    #用cv2读取新图片,0是flags字段,代表灰色,还可以是1,代表是彩色    img = Image.open(a,'r')    vcode = pytesseract.image_to_string(img)    print(vcode)    return vcodeif __name__ == '__main__':    loadPic(pic_url,pic_path,pic_name)    pic_open = Image.open(pic_path + pic_name,'r')    convert(pic_path,pic_open)

封装及可移植性修改

from selenium import webdriverimport osimport urllibfrom PIL import Imageimport pytesseractimport time'''driver = webdriver.PhantomJS()driver.get("http://10.70.18.33:8083/shopxx-mobile/password/find.jhtml")pic_url = driver.find_element_by_id("captchaImage").get_attribute('src')pic_name = pic_url.split("=")[1] + '.jpeg'pic_path = os.getcwd().split(os.path.basename(os.getcwd()))[0] + 'img/'t = str(time.time()).split('.')[0]'''class Ver(object):    '''    给出不同的url地址和验证码的元素定位名(此元素定位名应该是id)    '''    def __init__(self,url,ele):        '''        :param url: 是目标验证码下载url        :param ele: 是该url下的验证码图片html的id元素名        :return:        '''        self.driver = webdriver.PhantomJS()        self.driver.get(url)        self.pic_url = self.driver.find_element_by_id(ele).get_attribute('src')        self.pic_name = self.pic_url.split("=")[1] + '.jpeg'        #self.pic_path = os.getcwd().split(os.path.basename(os.getcwd()))[0] + 'img/'        self.pic_path = os.path.dirname(os.getcwd()) + '/img/'        if not os.path.exists(self.pic_path):            os.mkdir(self.pic_path)        self.t = str(time.time()).split('.')[0]    def loadPic(self):        '''        验证码图片下载        '''        urllib.request.urlretrieve(self.pic_url,self.pic_path + self.pic_name)        self.pic_open = Image.open(self.pic_path + self.pic_name,'r')    def convert(self):        '''        验证码识别        '''        imgrey = self.pic_open.convert('L')        threshold = 150        table = []        for i in range(256):            if i < threshold:                table.append(0)            else:                table.append(1)        #用列表生成一个图片        out = imgrey.point(table,'1')        out.save(self.pic_path + (self.pic_name.split('.')[0] + '--' + self.t + '.jpeg'),'jpeg')        a = self.pic_path + (self.pic_name.split('.')[0] + '--' + self.t + '.jpeg')        #用cv2读取新图片,0是flags字段,代表灰色,还可以是1,代表是彩色        img = Image.open(a,'r')        vcode = pytesseract.image_to_string(img)        print(vcode)        return vcodeif __name__ == '__main__':    aa = Ver("http://10.70.18.33:8083/shopxx-mobile/password/find.jhtml","captchaImage")    aa.loadPic()    aa.convert()

0 0