利用python+baidu-aip高精度识别验证码

基本原理说明

  • python-selenium获取到验证码图
  • python-PIL处理验证码图片
  • python-baidu-aip提交图片获取识别文字
  • python-selenium获取到验证码图

    目标:获取验证码图片
    模块:selenium
    核心步骤如下:

  • browser = webdriver.Firefox();browser.get('url');#打开浏览器访问地址
  • all_page = browser.save_screenshot('All.png')#截图获取整个页面
  • location = browser.find_element_by_xpath("//*[@id='imgCodeId']").locate#定位验证码
  • rangle = (x_begin,y_begin,x_end,y_end)#验证码的四个坐标
  • code_image = all_page.crop(rangle)#根据坐标截图
  • python-PIL处理验证码图片

    目标:提高图片识别的准确率
    模块:PIL
    核心步骤如下:

  • imgry = Image.open(r"result.jpg").convert('L')#灰度处理,去除颜色
  • sharpness =ImageEnhance.Contrast(imgry)#对比度增强
  • i3 = sharpness.enhance(3.0) #3.0为图像的饱和度
  • python-baidu-aip提交图片获取识别文字( 接口说明

    目标:传入验证码图片获取文字识别结果
    模块:baidu-aip
    核心步骤如下:

  • 对接口返回结果的二次验证和处理,本项目对返回的平均置信度、返回字符串类型进行二次判断,如未达到相关阈值,则刷新验证码,重新提交图片
  • 项目整体代码示例

    #coding=utf8
    from selenium import webdriver
    import time
    from PIL import Image
    from PIL import ImageEnhance
    from aip import AipOcr
    import os
    all_num = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
    '1','2','3','4','5','6','7','8','9','0']
    def baidu_image_to_word(image_path):
        """ 你的 APPID AK SK """
        APP_ID = '14364432'
        API_KEY = 'jgopMYaecGeGgaBr2EYWKNDZ'
        SECRET_KEY = 'TnpKrHyyc3IgrGw2L5ZzKRiY9F2seCSk'
        client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
        with open(image_path, 'rb') as fp:
            image = fp.read()
        """ 调用通用文字识别, 图片参数为本地图片 """
        client.basicGeneral(image);
        """ 如果有可选参数 """
        options = {}
        options["language_type"] = "ENG"
        options["detect_direction"] = "true"#是否检测图像朝向,默认不检测,即:false
        ##options["detect_language"] = "true"#是否检测语言,默认不检测
        options["probability"] = "true"#是否返回识别结果中每一行的置信度
        """ 带参数调用通用文字识别, 图片参数为本地图片 """
        res = client.basicAccurate(image, options)#通用文字识别(高精度版),普通版是client.basicGeneral(image, options);
        ##""" 调用通用文字识别, 图片参数为远程url图片 """
        ##url = "https//www.x.com/sample.jpg"
        ##client.basicGeneralUrl(url,options);
            guess = res['words_result'][0]['words']
            probability = res['words_result'][0]['probability']['average']
        except:
            print("识别失败,将置信度归为0,文字为空")
            guess = '';probability=0;
        return guess,probability,res
    def get_code(browser):
        image = browser.find_element_by_xpath("//*[@id='imgCodeId']")
        location = image.location# 获取验证码x,y轴坐标
        size = image.size # 获取验证码的长宽
        x_begin = int(location['x']);
        y_begin = int(location['y']);
        my_with = int(size['width'])
        my_height = int(size['height'])
        rangle = (x_begin,
                  y_begin,
                  x_begin+my_with,
                  y_begin+my_height) # 写成我们需要截取的位置坐标
    ##    print(rangle)
        probability = 0;count = 0;count_max = 30;words_num = 0
        ##图片文字识别的置信度大于0.9的时候才说明识别得比较准确,否则刷新验证码重新识别
        while (probability<0.92 or words_num!=4) and count<count_max:
            if count>0:
                browser.find_element_by_xpath("//*[@id='imgCodeId']").click()
                time.sleep(2)
            print("识别认证码中...请稍等")
            count += 1
            browser.save_screenshot('All.png') # 截取当前网页,该网页有我们需要的验证码
            save_image = Image.open("All.png") # 打开截图
            result = save_image.crop(rangle)
            result.save('result.jpg')
            ##增强图形识别率的处理
            i2=Image.open(r"result.jpg")
            imgry = i2.convert('L')   #图像加强,二值化,PIL中有九种不同模式。分别为1,L,P,RGB,RGBA,CMYK,YCbCr,I,F。L为灰度图像
            sharpness =ImageEnhance.Contrast(imgry)#对比度增强
            i3 = sharpness.enhance(3.0)  #3.0为图像的饱和度
            i3.save("result.png")
            ##连接api获取返回结果
            guess,probability,res = baidu_image_to_word('result.png') #连接百度api
            words_num = len(guess)
            print('第%d次猜测验证码,猜测结果为%s,猜测验证码个数为%d,置信度平均值为%f'%(count,guess,words_num,probability))
            ##保证返回结果里面含有刚好4个数字+字母的组合
            check_num = 0;
            for single in guess:
                if single in all_num:
                    check_num += 1
            if ' ' in guess:
                if len(guess.replace(' ',''))==4 and check_num==4:
                    guess = guess.replace(' ','')
                    words_num = len(guess)
                else:
                    print("pass!猜测个数不对或者猜测值不是数据和字母")
                    probability = 0
        os.rename('result.png','result-%s-%s.png'%(guess,str(probability)[2:]))##将验证码图片重命名成含识别结果以及平均置信度的名字
        time.sleep(2)
        return guess
    if __name__=="__main__":
    ##    chrome_options=webdriver.ChromeOptions()
    ####    chrome_options.add_argument('--headless')##设置无界面
    ##    browser = webdriver.Chrome(chrome_options=chrome_options)
        fireFoxOptions = webdriver.FirefoxOptions()
        fireFoxOptions.set_headless()##设置无界面
        browser = webdriver.Firefox(firefox_options=fireFoxOptions)
        browser.get('http://icrm.baidu.com/crm-portal/index.action')
        browser.maximize_window()