上个月发现百度文库最新出了一个验证码,是AI生成的。内容每次可能都不一样,所以给识别造成 了很大困难。传统的比对放松完全失效。
一、介绍
这个是最近才出的最新验证码,内容主要以工厂、建筑、山峰、机器人、汽车、盆栽植物等为主。如下图所示



优点:
解决了图片种类有限的问题,AI验证码可以随机生成,生成种类无限多,每天都不一样,给识别造成很大困难。
缺点:
AI生成的图片可能不符合逻辑,甚至连真人都识别不了,会给用户体验带来不好的影响。如下图所示


我们经过几周的研究, 终于解决了百度AI旋转验证码的识别问题。下面是我们提供的识别代码,感兴趣的小伙伴可以把它转换成js逆向的方式进行验证。运行下面代码,会直接触发验证码。然后可以看到识别过程。
想要识别更多验证码请查看:得塔云
二、识别代码
下面是我用Python + selenium 实现的自动识别代码。正确率在70%左右。python版本是3.7,selenium==3.141.0。直接运行下面代码,可以直接触发验证码,验证码有很多种,这里我们只会挑选挑选旋转验证码进行识别。其他类型的识别可以访问 :得塔云
- import os
- import sys
- import time
- import random
- import base64
- import requests
- import io
- from io import BytesIO
- from PIL import Image, ImageDraw
- from selenium import webdriver
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver.common.action_chains import ActionChains
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.support.select import Select
- from selenium.webdriver import FirefoxOptions
-
- # 用户的key
- key = '9vMU6EzIgmtfEuZe8iIw'
-
- #PIL图片保存为base64编码
- def PIL_base64(img, coding='utf-8'):
- img_format = img.format
- if img_format == None:
- img_format = 'JPEG'
-
- format_str = 'JPEG'
- if 'png' == img_format.lower():
- format_str = 'PNG'
- if 'gif' == img_format.lower():
- format_str = 'gif'
-
- if img.mode == "P":
- img = img.convert('RGB')
- if img.mode == "RGBA":
- format_str = 'PNG'
- img_format = 'PNG'
-
- output_buffer = BytesIO()
- # img.save(output_buffer, format=format_str)
- img.save(output_buffer, quality=100, format=format_str)
- byte_data = output_buffer.getvalue()
- base64_str = 'data:image/' + img_format.lower() + ';base64,' + base64.b64encode(byte_data).decode(coding)
-
- return base64_str
-
- # 接口识别
- def shibie(img):
- # 图片转base64
- img_base64 = PIL_base64(img)
- # 验证码识别接口
- url = "http://www.detayun.cn/openapi/verify_code_identify/"
- data = {
- # 用户的key
- "key": key,
- # 验证码类型
- "verify_idf_id": "44",
- # 样例图片
- "img_base64": img_base64,
- }
- header = {"Content-Type": "application/json"}
-
- # 发送请求调用接口
- response = requests.post(url=url, json=data, headers=header)
- # 判断是否正确请求
- if response.json()['code'] == 200:
- print(response.json())
- return response.json()['data']['angle']
- else:
- print('参数错误,请前往得塔云了解详情:https://www.detayun.cn/tool/verifyCodeHomePage2/?_=1714093687434')
- print('错误参数:', response.json())
- return None
-
-
-
- # 浏览器配置
- option = FirefoxOptions()
- # option.add_argument('--headless')
- driver = webdriver.Firefox(executable_path=r'webdriver\geckodriver.exe', options=option)
-
- # 记录成功次数
- t = 0
- # 记录失败次数
- f = 0
-
- for i in range(200):
- # 打开验证码页面
- driver.get('https://seccaptcha.baidu.com/v1/webapi/verint/svcp.html?ak=M7bcdh2k6uqtYV5miaRiI8m8x6LIaONq&backurl=https%3A%2F%2Fwenku.baidu.com%2F%3F_wkts_%3D1705066238641&ext=ih2lW9VV3PmxmO%2B%2Bx8wZgk9i1xGx9WH05J9hI74kTEVkpokzRQ8QxLB082MG2VoQUUT15llYBwsC%2BAaysNoPxpuKg0Hkpo4qMzBjXDEGhuQ%3D&subid=pc_home&ts=1705066239&sign=1cebe634245cd92fc9eca10d0850a36b')
- time.sleep(3)
-
- html_str = driver.page_source
- if 'canvas' in html_str:
- if '曲线' in html_str:
- print('曲线验证码')
-
- elif '数值' in html_str or '数字' in html_str:
- print('数值验证码')
- else:
- print('旋转验证码')
- # 等待图片出现
- WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//img[@class="passMod_spin-background"]'))
- img = driver.find_element_by_xpath('//img[@class="passMod_spin-background"]')
- img_url = img.get_attribute('src')
-
- # 下载图片
- header = {
- "Host": "passport.baidu.com",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
- "Accept": "image/webp,*/*",
- "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
- "Accept-Encoding": "gzip, deflate, br",
- "Connection": "keep-alive",
- "Referer": "https://seccaptcha.baidu.com/v1/webapi/verint/svcp.html?ak=M7bcdh2k6uqtYV5miaRiI8m8x6LIaONq&backurl=https%3A%2F%2Fwenku.baidu.com%2F%3F_wkts_%3D1705066238641&ext=ih2lW9VV3PmxmO%2B%2Bx8wZgk9i1xGx9WH05J9hI74kTEVkpokzRQ8QxLB082MG2VoQUUT15llYBwsC%2BAaysNoPxpuKg0Hkpo4qMzBjXDEGhuQ%3D&subid=pc_home&ts=1705066239&sign=1cebe634245cd92fc9eca10d0850a36b",
- "Cookie": "BAIDUID=A0621DC238F4D936B38F699B70A7E41F:SL=0:NR=10:FG=1; BIDUPSID=A0621DC238F4D9360CD42C9C31352635; PSTM=1667351865; HOSUPPORT=1; UBI=fi_PncwhpxZ%7ETaKAanh2ue0vFk6vHMY02DgvigILJIFul8Z1nzMr9do3SYLtjAUqHSpUz7LvOKV27cIr18-YJryP0Q8j92oo93%7E6hGa0CLdraAlaHUZG-0PW9QrpZkW7MTyUn-yrAq7OmSRBIJ7%7E8gM9pv-; HISTORY=0ece87e30ec8ecccd52ff3d5c42f98002a893bfb73ff358893; BDUSS_BFESS=kwTVdpeFNORXlWVEozbW1kcFhBeHo0ZWQwbVlJNlBvcFhEWWpRZVJQWGhzbnBsSUFBQUFBJCQAAAAAAAAAAAEAAAC13Mct0KHQwl9keHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOElU2XhJVNld1; H_WISE_SIDS=219946_216846_213346_219942_213039_230178_204909_230288_110085_236307_243888_244730_245412_243706_232281_249910_247148_250889_249892_252577_234296_253427_253705_240590_254471_179345_254689_254884_254864_253213_255713_254765_255939_255959_255982_107317_256062_256093_256083_255803_253993_256257_255661_256025_256223_256439_256446_254831_253151_256252_256196_256726_256739_251973_256230_256611_256996_257068_257079_257047_254075_257110_257208_251196_254144_257290_251068_256095_257287_254317_251059_251133_254299_257454_257302_255317_255907_255324_257481_244258_257582_257542_257503_255177_257745_257786_257937_257167_257904_197096_257586_257402_255231_257790_258193_258248_258165_8000084_8000115_8000114_8000126_8000140_8000149_8000166_8000172_8000178_8000181_8000185_8000204; Hm_lvt_90056b3f84f90da57dc0f40150f005d5=1700546200; MAWEBCUID=web_VYfxPuQDaKjEzVgXMFgoHouACkpXyjcDpcWwhATKqELuuwEtNy; BAIDUID_BFESS=A0621DC238F4D936B38F699B70A7E41F:SL=0:NR=10:FG=1; H_PS_PSSID=40206_40215_40080_40352_40379_40416_40300_40466_40471_40317; ZFY=j0lpzcgUac2hW5oc8GUPbnW9ug8zMx:B7VJa:AnxqPUaQ:C; BDRCVFR[gltLrB7qNCt]=mk3SLVN4HKm; delPer=0; PSINO=6",
-
- }
- response = requests.get(url=img_url, headers=header)
- img = Image.open(BytesIO(response.content))
- # 识别角度 360度对应238像素
- angle = shibie(img)
-
- # 计算滑动距离
- move_x = int(angle * (238 / 360))
- if move_x >= 238:
- move_x = 237
- elif move_x < 10:
- move_x = 10
-
- print(angle, move_x)
- # 获取滑块
- WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[@class="passMod_slide-btn "]'))
- tag = driver.find_element_by_xpath('//div[@class="passMod_slide-btn "]')
-
- # 滑动滑块
- action = ActionChains(driver)
- action.click_and_hold(tag).perform()
- # 计算实际滑动距离 = 像素距离 + 前面空白距离
- if move_x+11 < 238:
- action.move_by_offset(move_x+11, 5)
- action.move_by_offset(-15, -2)
- action.move_by_offset(4, 3)
- else:
- action.move_by_offset(move_x - 11, 5)
- action.move_by_offset(7, -2)
- action.move_by_offset(4, 3)
- action.release().perform()
-
- # 判断是否成功 app
- try:
- WebDriverWait(driver, 5).until(lambda x: x.find_element_by_xpath('//div[@id="app"]'))
- t += 1
- print('成功')
- except:
- f += 1
- print('失败')
- time.sleep(2)
- print('总次数:{},成功:{},失败:{},正确率:{}'.format(t + f, t, f, t / (t + f)))