验证码识别

来源:互联网 发布:网络爬虫是干嘛的 编辑:程序博客网 时间:2024/06/16 14:22
# encoding=utf-8import syssys.path.append('/home/henson/Documents/coding/bill/captcha_recognition')from  captcha_recognition.recognition_img import distinguish_captchafrom PIL import Image,ImageDraw,ImageChopsfrom selenium.common.exceptions import NoSuchElementExceptionfrom  PIL import Imageimport seleniumimport sysimport timeimport reimport csvimport pytesser# 打开验证码界面import osfrom selenium import webdriverfrom selenium.webdriver.support.ui import WebDriverWaitheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}chromedriver = "/home/henson/Documents/pycharm/webdriver/chromedriver"os.environ["webdriver.chrome.driver"] = chromedriverdriver = webdriver.Chrome(chromedriver)driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")"""driver.get('http://www******')    #driver.get(url)for i in range(1,1000):    driver.get_screenshot_as_file('screenshot.png')    # 获取指定元素位置    element = driver.find_element_by_id('imgCode')    left = int(element.location['x'])    top = int(element.location['y'])    right = int(element.location['x'] + element.size['width'])    bottom = int(element.location['y'] + element.size['height'])    # 通过Image处理图像    im = Image.open('screenshot.png')    im = im.crop((left, top, right, bottom))    im.save('/home/henson/Documents/coding/bill/code/code+('+str(i)+').png')    driver.find_element_by_xpath('//*[@id="imgCode"]').click()  # 切换验证码"""def get_img(url):    try:        # driver.get('*****')        driver.get(url)        # 获取截图        url_core = driver.current_url        current_url = url_core        while (current_url == url_core):            driver.get_screenshot_as_file('screenshot.png')            # 获取指定元素位置            element = driver.find_element_by_id('imgCode')            left = int(element.location['x'])            top = int(element.location['y'])            right = int(element.location['x'] + element.size['width'])            bottom = int(element.location['y'] + element.size['height'])            # 通过Image处理图像            im = Image.open('screenshot.png')            im = im.crop((left, top, right, bottom))            im.save('code.png')            inputElement = driver.find_element_by_name("txtCode")            #get_img(url_core)  # 获取验证码的图片            image = Image.open('code.png')            code = distinguish_captcha(image)  # 返回识别结果            print(code)            inputElement.send_keys(code)            driver.find_element_by_xpath('//*[@id="btnOk"]').click()            try:                alert = driver.switch_to_alert()                time.sleep(2)                print(alert.text)  # 打印警告对话框内容                alert.accept()  # 关闭弹出的窗口                driver.find_element_by_name("txtCode").clear()            except Exception:                pass            current_url = driver.current_url        #driver.close()    except Exception:        print(Exception)

1.先获取验证码的图片,此处用的是selenium+driver 截屏功能,再对获得的图片进行切割
2.导入了人家的captcha_recognition,用来做验证码的识别
captcha_recognition包里,对有验证码进行二值话,切割….不过最好根据自己的验证码进行收集,切割样本手动分类,提高识别度。

=======================>>>>>>
实践感觉效果算是不错的了,可以一用

原创粉丝点击