1.获取图片
- import re
- import time
- import ddddocr
- import requests
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver import ActionChains
- service = Service("driver/chromedriver.exe")
- driver = webdriver.Chrome(service=service)
- # 1.打开首页
- driver.get('https://www.geetest.com/adaptive-captcha-demo')
- # 2.点击【文字点选验证】
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.XPATH,
- '//*[@id="gt-showZh-mobile"]/div/section/div/div[2]/div[1]/div[2]/div[3]/div[4]'
- ))
- tag.click()
- # 3.点击开始验证
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.CLASS_NAME,
- 'geetest_btn_click'
- ))
- tag.click()
- time.sleep(5)
- # 要识别的目标图片
- target_tag = driver.find_element(
- By.CLASS_NAME,
- 'geetest_ques_back'
- )
- target_tag.screenshot("target.png")
- # 识别图片
- bg_tag = driver.find_element(
- By.CLASS_NAME,
- 'geetest_bg'
- )
- bg_tag.screenshot("bg.png")
- time.sleep(2000)
- driver.close()
复制代码 2.目标辨认
截图每个字符,并基于ddddocr辨认。
- import re
- import time
- import ddddocr
- import requests
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver import ActionChains
- service = Service("driver/chromedriver.exe")
- driver = webdriver.Chrome(service=service)
- # 1.打开首页
- driver.get('https://www.geetest.com/adaptive-captcha-demo')
- # 2.点击【滑动拼图验证】
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.XPATH,
- '//*[@id="gt-showZh-mobile"]/div/section/div/div[2]/div[1]/div[2]/div[3]/div[4]'
- ))
- tag.click()
- # 3.点击开始验证
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.CLASS_NAME,
- 'geetest_btn_click'
- ))
- tag.click()
- # 4.等待验证码出来
- time.sleep(5)
- # 5.识别任务图片
- target_word_list = []
- parent = driver.find_element(By.CLASS_NAME, 'geetest_ques_back')
- tag_list = parent.find_elements(By.TAG_NAME, "img")
- for tag in tag_list:
- ocr = ddddocr.DdddOcr(show_ad=False)
- word = ocr.classification(tag.screenshot_as_png)
- target_word_list.append(word)
- print("要识别的文字:", target_word_list)
- time.sleep(2000)
- driver.close()
复制代码 3.背景坐标辨认
3.1 ddddocr
能辨认,但是发现默认辨认率有点低,想要提拔辨认率,可以搭建Pytorch情况对模型举行训练,参考:https://github.com/sml2h3/dddd_trainer
- import re
- import time
- import ddddocr
- import requests
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver import ActionChains
- from PIL import Image, ImageDraw
- from io import BytesIO
- service = Service("driver/chromedriver.exe")
- driver = webdriver.Chrome(service=service)
- # 1.打开首页
- driver.get('https://www.geetest.com/adaptive-captcha-demo')
- # 2.点击【滑动拼图验证】
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.XPATH,
- '//*[@id="gt-showZh-mobile"]/div/section/div/div[2]/div[1]/div[2]/div[3]/div[4]'
- ))
- tag.click()
- # 3.点击开始验证
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.CLASS_NAME,
- 'geetest_btn_click'
- ))
- tag.click()
- # 4.等待验证码出来
- time.sleep(5)
- # 5.识别任务图片
- target_word_list = []
- parent = driver.find_element(By.CLASS_NAME, 'geetest_ques_back')
- tag_list = parent.find_elements(By.TAG_NAME, "img")
- for tag in tag_list:
- ocr = ddddocr.DdddOcr(show_ad=False)
- word = ocr.classification(tag.screenshot_as_png)
- target_word_list.append(word)
- print("要识别的文字:", target_word_list)
- # 6.背景图片
- bg_tag = driver.find_element(
- By.CLASS_NAME,
- 'geetest_bg'
- )
- content = bg_tag.screenshot_as_png
- # 7.识别背景中的所有文字并获取坐标
- ocr = ddddocr.DdddOcr(show_ad=False, det=True)
- poses = ocr.detection(content) # [(x1, y1, x2, y2), (x1, y1, x2, y2), x1, y1, x2, y2]
- # 8.循环坐标中的每个文字并识别
- bg_word_dict = {}
- img = Image.open(BytesIO(content))
- for box in poses:
- x1, y1, x2, y2 = box
- # 根据坐标获取每个文字的图片
- corp = img.crop(box)
- img_byte = BytesIO()
- corp.save(img_byte, 'png')
- # 识别文字
- ocr2 = ddddocr.DdddOcr(show_ad=False)
- word = ocr2.classification(img_byte.getvalue()) # 识别率低
- # 获取每个字的坐标 {"鸭":}
- bg_word_dict[word] = [int((x1 + x2) / 2), int((y1 + y2) / 2)]
- print(bg_word_dict)
- time.sleep(1000)
- driver.close()
复制代码 3.2 打码平台
https://www.chaojiying.com/
- import base64
- import requests
- from hashlib import md5
- file_bytes = open('5.jpg', 'rb').read()
- res = requests.post(
- url='http://upload.chaojiying.net/Upload/Processing.php',
- data={
- 'user': "deng",
- 'pass2': md5("密码".encode('utf-8')).hexdigest(),
- 'codetype': "9501",
- 'file_base64': base64.b64encode(file_bytes)
- },
- headers={
- 'Connection': 'Keep-Alive',
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
- }
- )
- res_dict = res.json()
- print(res_dict)
- # {'err_no': 0, 'err_str': 'OK', 'pic_id': '1234612060701120002', 'pic_str': '的,86,73|粉,111,38|菜,40,49|香,198,101', 'md5': 'faac71fc832b2ead01ffb4e813f3be60'}
复制代码 结合极验案例截图+辨认:
- import re
- import time
- import ddddocr
- import requests
- import base64
- import requests
- from hashlib import md5
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver import ActionChains
- from PIL import Image, ImageDraw
- from io import BytesIO
- service = Service("driver/chromedriver.exe")
- driver = webdriver.Chrome(service=service)
- # 1.打开首页
- driver.get('https://www.geetest.com/adaptive-captcha-demo')
- # 2.点击【滑动拼图验证】
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.XPATH,
- '//*[@id="gt-showZh-mobile"]/div/section/div/div[2]/div[1]/div[2]/div[3]/div[4]'
- ))
- tag.click()
- # 3.点击开始验证
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.CLASS_NAME,
- 'geetest_btn_click'
- ))
- tag.click()
- # 4.等待验证码出来
- time.sleep(5)
- # 5.识别任务图片
- target_word_list = []
- parent = driver.find_element(By.CLASS_NAME, 'geetest_ques_back')
- tag_list = parent.find_elements(By.TAG_NAME, "img")
- for tag in tag_list:
- ocr = ddddocr.DdddOcr(show_ad=False)
- word = ocr.classification(tag.screenshot_as_png)
- target_word_list.append(word)
- print("要识别的文字:", target_word_list)
- # 6.背景图片
- bg_tag = driver.find_element(
- By.CLASS_NAME,
- 'geetest_bg'
- )
- content = bg_tag.screenshot_as_png
- bg_tag.screenshot("bg.png")
- # 7.识别背景中的所有文字并获取坐标
- res = requests.post(
- url='http://upload.chaojiying.net/Upload/Processing.php',
- data={
- 'user': "deng",
- 'pass2': md5("密码".encode('utf-8')).hexdigest(),
- 'codetype': "9501",
- 'file_base64': base64.b64encode(content)
- },
- headers={
- 'Connection': 'Keep-Alive',
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
- }
- )
- res_dict = res.json()
- print(res_dict)
- # 8.每个字的坐标 {"鸭":(196,85), ...} target_word_list = ["花","鸭","字"]
- bg_word_dict = {}
- for item in res_dict["pic_str"].split("|"):
- word, x, y = item.split(",")
- bg_word_dict[word] = (x, y)
-
- print(bg_word_dict)
- time.sleep(1000)
- driver.close()
复制代码 4.坐标点击
根据坐标,在验证码上举行点击。
- ActionChains(driver).move_to_element_with_offset(标签对象, xoffset=x, yoffset=y).click().perform()
复制代码- import re
- import time
- import ddddocr
- import requests
- import base64
- import requests
- from hashlib import md5
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver import ActionChains
- from PIL import Image, ImageDraw
- from io import BytesIO
- service = Service("driver/chromedriver.exe")
- driver = webdriver.Chrome(service=service)
- # 1.打开首页
- driver.get('https://www.geetest.com/adaptive-captcha-demo')
- # 2.点击【滑动拼图验证】
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.XPATH,
- '//*[@id="gt-showZh-mobile"]/div/section/div/div[2]/div[1]/div[2]/div[3]/div[4]'
- ))
- tag.click()
- # 3.点击开始验证
- tag = WebDriverWait(driver, 30, 0.5).until(lambda dv: dv.find_element(
- By.CLASS_NAME,
- 'geetest_btn_click'
- ))
- tag.click()
- # 4.等待验证码出来
- time.sleep(5)
- # 5.识别任务图片
- target_word_list = []
- parent = driver.find_element(By.CLASS_NAME, 'geetest_ques_back')
- tag_list = parent.find_elements(By.TAG_NAME, "img")
- for tag in tag_list:
- ocr = ddddocr.DdddOcr(show_ad=False)
- word = ocr.classification(tag.screenshot_as_png)
- target_word_list.append(word)
- print("要识别的文字:", target_word_list)
- # 6.背景图片
- bg_tag = driver.find_element(
- By.CLASS_NAME,
- 'geetest_bg'
- )
- content = bg_tag.screenshot_as_png
- # bg_tag.screenshot("bg.png")
- # 7.识别背景中的所有文字并获取坐标
- res = requests.post(
- url='http://upload.chaojiying.net/Upload/Processing.php',
- data={
- 'user': "deng",
- 'pass2': md5("自己密码".encode('utf-8')).hexdigest(),
- 'codetype': "9501",
- 'file_base64': base64.b64encode(content)
- },
- headers={
- 'Connection': 'Keep-Alive',
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
- }
- )
- res_dict = res.json()
- bg_word_dict = {}
- for item in res_dict["pic_str"].split("|"):
- word, x, y = item.split(",")
- bg_word_dict[word] = (x, y)
- print(bg_word_dict)
- # target_word_list = ['粉', '菜', '香']
- # bg_word_dict = {'粉': ('10', '10'), '菜': ('50', '50'), '香': ('100', '93')}
- # 8.点击
- for word in target_word_list:
- time.sleep(2)
- group = bg_word_dict.get(word)
- if not group:
- continue
- x, y = group
- x = int(x) - int(bg_tag.size['width'] / 2)
- y = int(y) - int(bg_tag.size['height'] / 2)
- ActionChains(driver).move_to_element_with_offset(bg_tag, xoffset=x, yoffset=y).click().perform()
- time.sleep(1000)
- driver.close()
复制代码 5.图片验证码
在许多登录、注册、频仍操作等行为时,一般都会加入验证码的功能。
如果想要基于代码实现某些功能,就必须实现:主动辨认验证码,然后再做其他功能。
6.辨认
基于Python的模块 ddddocr 可以实现对图片验证码的辨认。
- pip3.11 install ddddocr==1.4.9 -i https://mirrors.aliyun.com/pypi/simple/
- pip3.11 install Pillow==9.5.0
复制代码- pip install ddddocr==1.4.9 -i https://mirrors.aliyun.com/pypi/simple/
- pip install Pillow==9.5.0
复制代码 6.1 本地辨认
- import ddddocr
- ocr = ddddocr.DdddOcr(show_ad=False)
- with open("img/v1.jpg", mode='rb') as f:
- body = f.read()
- code = ocr.classification(body)
- print(code)
复制代码 6.2 在线辨认
也可以直接哀求获取图片,然后直接辨认:
- import ddddocr
- import requests
- res = requests.get(url="https://console.zbox.filez.com/captcha/create/reg?_t=1701511836608")
- ocr = ddddocr.DdddOcr(show_ad=False)
- code = ocr.classification(res.content)
- print(code)
复制代码- import ddddocr
- import requests
- res = requests.get(
- url=f"https://api.ruanwen.la/api/auth/captcha?captcha_token=n5A6VXIsMiI4MTKoco0VigkZbByJbDahhRHGNJmS"
- )
- ocr = ddddocr.DdddOcr(show_ad=False)
- code = ocr.classification(res.content)
- print(code)
复制代码 6.3 base64
有些平台的图片是以base64编码情势存在,需要处置处罚下在辨认。
- import base64
- import ddddocr
- content = base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAGQAAAAoCAYAAAAIeF9DAAAHGElEQVR4Xu2a2VNTZxTAHZ/62of+BX3rdPrUmaq1da3WQWur1mqntrQWLe7UkUoQlEWFqFDZZN8hUBWKQUVpQDCyVUeltVWIIiAEZHWBAEk4zffZe+bmS+6SEEzE/GbOkHvPuXeY85t7vyWZBV48ilnsCS/uxSvEw3hthJydXWITnsiMFyLWfLGcu5jRQuQ2W27dy8ArBOTXvQxmrBBHm+xo/XQhKkTffRu0NSfgt8KvISttGaQlfQyFOWtBXboDbt7Ig6HBdvYSj6C7awDC3kqGg4oC8N0YC6uWhcPCOUHgszQMtvudgsK8Gnj2dNTqGmeErH47Z8rBYlfI6MgAXLqwH5Lj50iGJzL//UDJWP1pBDTfasNrnBEyHdgIef6sF1R5X9o0Xig8Ebb5QrF8QajlLTDoMTIIVkLMZiOcKfrWquGXLyqgo70BDKNDNG8wDFleCTehsS4JivK/4l/uMWxco4T4WDUo30yH+zo9PH0yavn/x+nnuBg1LPhgP0qJjjzjuUJuNGWiiJSEedByt4KffiWxt9bIz65GISveO2CVczcoxGQah+y05SikqT6ZX/fKw4khkfNGEQpZNDeILXUrKKT13mWUkZmyBIxGA7/O5XT2PoL0c9kQcPIX+D5yK2w/vgcis6Oh5qYWJicnac2msM0YrmRw4BkK+cwyA3OEuLh4CApS0GhoaGTTNtTXN2B9fHwCm7YBhdRWR6GQK5rD/BqXU9FQCb4RflYN50d0fgwYxsemTUjJ6ToUEh6iYtOi1NXVY4MTEhLZtA2khquvr69n0zagEP5gfu/uBXruga4aykr8ISv1E0g/tRBUueug+o8I6NH/hTdwFO3tOhsB9iK5NN2lQsbHjZbJSR9kplbC4nkKKsNnySHoejTAlooyOjoKisAXDSYRN2cvW4Lo9XqsCwkJpddKgUJyM3xQSH9fC9RUHbWabbFBnigy63KEEcMIbI3agU0mrynN9WoYfDoIJrOJ/iXH5DwryFnYqS4XP26Kg86OPrZcklPvBIBKVYSNVqvL6Tl7qNVqrCsqKmbTdkEhaUkfYbOv1cbaCLAXNVVR/HtJQl5VXIO3Ru+E3sHHbAmFnCf56RKyb3cmPLjfw5bKgjS/tVWHjY6IiISkd22FmEwmmuPqdDodW2IXFJKaOB8bnZr4IRQXbITWlst01U6ehJGRfnpM1h58KY68vo4VxGKD1doXr0UhSH66hHCx/+dsePJkhL1EkiSLlJDdQdjs5mbbHjQ3N2NeqTyGExUpUAgZJ7gmny32BeOE/ffdhOX8adUmrK2qlD9L2RWzFxvc3adn01Z09XW7RAiH0WiCx73DoKm8DVt8E1DK+tVRTknRaDTY8MzMLDZNz3F5jaaKTQuCQgpy1mCTH3U08Wts6LSs3Lnawty1bFoQMr3lGjxhnGDTVpC8K4XwMZvNELwvF6XEnTjHlkgyPDwMCkUwbTj5S47l5KRAIefL9mCThZ4ODpLnasnsSy6eIoTQ/vAxClm36iibloXQUyD19IiBQhquJTgnJHkRmxbEna8slgnLNJgT4uxq3d44QYJ8FhtfxEAhXZ3XscmdHeIrULLZyNWq8tazaUHcMagL0XKvC4WQ70ucwXYmdZ/OprhjkiM1joBCJifNkJe5kjaZDuoCWyfsoO7I1Ncd0157jI1NwC7/FBQSGJDFlsiGv9YoLi6m6w3uuLy8nC2XxGq395+/S7HRZNqra6m0rC4HLYOgif4lx8X5G7AmOX4uDPTLm18T7C0Mq65fsSwIh/5fGA7R46ksDDd8oYSTx89BnfZfut1O9q1MJjPdfm970EO3Tcj2PH/6e7XmDnsb2bCrcRLcsV7v+FrHSgh5SviDu1RUrAyB1tlX+beQRO7WSUppBn7+LtyPvY0g/EbLicOH5K2gxeDvV3GRmJjElsnCSgiBDNgV5wNtms8P8l3Jn41pluoXix0ixRExcjYXh58/weOflLvYWwjCNlwoyECennzJ8vTLW7CJQXZ9WSGNjeLjsBA2Qjja27RQeTEYcsJW0G2VjJTF9McO2toYwR83OCKFbL+nlWVBwK+BdDq87Zj19jvJc0ICE4PZywUhP3BQ/94E4QdU8MM3J+HzFZH0Bw5L5wfDGp8jsHdnBuRlVdNFoqswGAwQGnoQZZDPY2NjbJksBIUQHGkwhzPX2KPkShkKSTqbwqZnLIJCptLYqVxLaNd3gN/RbSik9paWLZmxuEVIcMohKL92EVo6dNA/PABGk5F+IdXW/RBOV5XA5iP+KMNfuRvGJ8bZW8xY3CKEHcTFouGO+L7aTMNjhWw+7P9avao4BIUQpBprDznXtPd0wJmqUjiSo4R98Qq6WPSN2EIXhBFZUXRAJ9Pe1xFRIQQ5DeZwpNaLfSSFEEijxZotlfciH1lCOLjGs+HFdTgkxMv08x9BPe61Ol73uQAAAABJRU5ErkJggg==")
- # with open('x.png', mode='wb') as f:
- # f.write(content)
- ocr = ddddocr.DdddOcr(show_ad=False)
- code = ocr.classification(content)
- print(code)
复制代码 7.案例:x文街
https://i.ruanwen.la/
- import requests
- import ddddocr
- # 获得图片验证码地址
- res = requests.post(url="https://api.ruanwen.la/api/auth/captcha/generate")
- res_dict = res.json()
- captcha_token = res_dict['data']['captcha_token']
- captcha_url = res_dict['data']['src']
- # 访问并获取图片验证码
- res = requests.get(captcha_url)
- # 识别验证码
- ocr = ddddocr.DdddOcr(show_ad=False)
- code = ocr.classification(res.content)
- print(code)
- # 登录认证
- res = requests.post(
- url="https://api.ruanwen.la/api/auth/authenticate",
- json={
- "mobile": "手机号",
- "device": "pc",
- "password": "密码",
- "captcha_token": captcha_token,
- "captcha": code,
- "identity": "advertiser"
- }
- )
- print(res.json())
- # {'success': True, 'message': '验证成功', 'data': {'token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczovL2FwaS5ydWFud2VuLmxhL2FwaS9hdXRoL2F1dGhlbnRpY2F0ZSIsImlhdCI6MTcwMTY1MzI2NywiZXhwIjoxNzA1MjUzMjY3LCJuYmYiOjE3MDE2NTMyNjcsImp0aSI6IjQ3bk05ejZyQ0JLV28wOEQiLCJzdWIiOjUzMzEyNTgsInBydiI6IjQxZGY4ODM0ZjFiOThmNzBlZmE2MGFhZWRlZjQyMzQxMzcwMDY5MGMifQ.XxFYMEot-DfjTUcuVuoCjcBqu3djvzJiTeJERaR95co'}, 'status': 200}
复制代码 免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |