用Python实现china-pub登录验证码的识别--代码版

来源:互联网 发布:android 内存优化 编辑:程序博客网 时间:2024/05/11 23:11

今天看了下python验证码识别,网上搜索到一个比较简单入门级的。但是代码连缩进也没有,自己动手实践了下。

用Python实现china-pub登录验证码的识别

例子是china-pub的一个验证码,比较简单,很适合初学者拿来练习。

URL:http://www.china-pub.com/edition06/imgchk/validatecode.asp

Python:Python 2.7.3

PIL:Python Imaging Library 1.1.7 for Python 2.7 http://www.pythonware.com/products/pil/

图像处理

验证码的属性,大小为40×10。处理几张图片就得到0~9的全部数字。然后分析数字的特征和它们之间的差别。
import Imagedef img2four(image):    width = 10    height = 10    left = 0    upper = 0    right = 10    lower = 10    c = 4    while(c):        box = (left, upper, right, lower)        img = Image.open(image)        region = img.crop(box)        region.convert('L').save(str(4-c)+'.bmp')        left += + 10        right += 10        c -= 1

验证码识别

看某个数字的全部pixel值
def printPixel(image):    img = Image.open(image)    for y in range(0, 10):        for x in range(0, 10):            print img.getpixel((x,y)),        print
用if语句一个一个找,数字被一个一个的识别出来。
def cross(color):    bgcolor = 238     if color != bgcolor:        return True    else:        return False    def recognize(image):    img = Image.open(image)      p = img.getpixel((1,8))     if cross(p):        return 7          p = img.getpixel((0,0))     if cross(p):        return 5              p = img.getpixel((2,1))     if cross(p):        return 1          p = img.getpixel((3,1))     if cross(p):        return 4          p = img.getpixel((1,1))     if cross(p):        # not 1, must be 6        return 6          p = img.getpixel((1,7))     if cross(p):        return 2      p = img.getpixel((2,5))     if cross(p):        return 9      p = img.getpixel((5,4))     if cross(p):        # not 9, must be 0        return 0          p = img.getpixel((1,4))     if cross(p):        return 8     else:        return 3  def getCode(image):    img2four(image)    file_0 = "0.bmp"    file_1 = "1.bmp"    file_2 = "2.bmp"    file_3 = "3.bmp"    n0 = recognize(file_0)     n1 = recognize(file_1)     n2 = recognize(file_2)     n3 = recognize(file_3)     # remove crop files        #import time    #time.sleep(3)    #if you want to see the temp images, sleep 3 secs    if os.path.exists(file_0):        os.remove(file_0)          if os.path.exists(file_1):        os.remove(file_1)          if os.path.exists(file_2):        os.remove(file_2)          if os.path.exists(file_3):        os.remove(file_3)          return str(n0) + str(n1) + str(n2) + str(n3)

compare.py

比较和统计从(0,0)到(9,9)这100个点有哪些数字通过,然后打印出通过的比较少的(1~2个)那些坐标来做判断。
import Image def printPixel(image):    # get a bmp files all pixel    img = Image.open(image)     l=[]     for y in range(0, 10):        for x in range(0, 10):            l.append(img.getpixel((x,y)))     # print l    return l  def getxy(number):    #convert lists index to the tuple of pixel    x = number % 10     y = number / 10     return (x, y)  def findDiff():    # find different point between 0 and 9    list = []     for i in range(0, 10):        list.append(printPixel('n_'+str(i)+'.bmp'))    # print list    for j in range(0,100):        count = 0         num = []         for k in range(0, 10):            if list[k][j] != 238:                count = count + 1                num.append(k)                     if count < 3 and count > 0:            #print 'count = ',count,', pixel is ', getxy(j), ', num is ', num            print "pixel is %s, count = %d, numbers are %s" % (getxy(j), count, num)     if __name__ == "__main__":    findDiff()

get_code.py完整代码

import Imageimport urllibimport osdef img2four(image):    width = 10    height = 10    left = 0    upper = 0    right = 10    lower = 10    c = 4    while(c):        box = (left, upper, right, lower)        img = Image.open(image)        region = img.crop(box)        region.convert('L').save(str(4-c)+'.bmp')        left += + 10        right += 10        c -= 1def printPixel(image):    img = Image.open(image)    for y in range(0, 10):        for x in range(0, 10):            print img.getpixel((x,y)),        print# img2four("test1.png")# printPixel("n_0.bmp")def cross(color):    bgcolor = 238     if color != bgcolor:        return True    else:        return False    def recognize(image):    img = Image.open(image)      p = img.getpixel((1,8))     if cross(p):        return 7          p = img.getpixel((0,0))     if cross(p):        return 5              p = img.getpixel((2,1))     if cross(p):        return 1          p = img.getpixel((3,1))     if cross(p):        return 4          p = img.getpixel((1,1))     if cross(p):        # not 1, must be 6        return 6          p = img.getpixel((1,7))     if cross(p):        return 2      p = img.getpixel((2,5))     if cross(p):        return 9      p = img.getpixel((5,4))     if cross(p):        # not 9, must be 0        return 0          p = img.getpixel((1,4))     if cross(p):        return 8     else:        return 3  def getCode(image):    img2four(image)    file_0 = "0.bmp"    file_1 = "1.bmp"    file_2 = "2.bmp"    file_3 = "3.bmp"    n0 = recognize(file_0)     n1 = recognize(file_1)     n2 = recognize(file_2)     n3 = recognize(file_3)     # remove crop files        #import time    #time.sleep(3)    #if you want to see the temp images, sleep 3 secs    if os.path.exists(file_0):        os.remove(file_0)          if os.path.exists(file_1):        os.remove(file_1)          if os.path.exists(file_2):        os.remove(file_2)          if os.path.exists(file_3):        os.remove(file_3)          return str(n0) + str(n1) + str(n2) + str(n3)def validateCode():    url = "http://www.china-pub.com/edition06/imgchk/validatecode.asp"    path = "code67.png"    data = urllib.urlopen(url).read()    f = file(path, "wb")    f.write(data)    f.close()    print getCode(path)    if __name__  == "__main__":    validateCode()


参考

http://effbot.org/imagingbook/image.htm
原创粉丝点击