爬取玉人图片
精美图库地点
一页图片
安装依靠库文件
- pip install selenium requests beautifulsoup4
- import time
- import requests
- import random
- from bs4 import BeautifulSoup
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.chrome.options import Options
- # 设置Chrome选项
- chrome_options = Options()
- chrome_options.add_argument("--headless") # 无头模式,不打开浏览器窗口
- chrome_options.add_argument("--disable-gpu")
- chrome_options.add_argument("--no-sandbox")
- # 设置ChromeDriver路径
- service = Service('D:\env\python3\chromedriver.exe')
- url = 'https://www.umei.cc/touxiangtupian/nvshengtouxiang/'
- baseUrl = "https://www.umei.cc"
- # 初始化WebDriver
- driver = webdriver.Chrome(service=service, options=chrome_options)
- driver.get(url)
- time.sleep(random.uniform(5, 10)) # 等待页面加载
- html = driver.page_source # 原页面
- soup = BeautifulSoup(html, 'html.parser')
- # print(soup)
- # BeautifulSoup分析提取元素
- divList = soup.find_all("div",class_= "item masonry_brick")
- # print(divList)
- # 一个美女信息
- for divItem in divList:
- linkImage = divItem.find("div",class_ = "item_t").find("div",class_ = "img").find("a")["href"]
- linkImage = baseUrl + linkImage
- # 拿去子页面的大图
- driver.get(linkImage)
- time.sleep(random.uniform(5, 10))
- html = driver.page_source
- sonSoup = BeautifulSoup(html, 'html.parser')
- imgUrl = sonSoup.find("div",class_ = "tsmaincont-main-cont-txt").find("img")["src"]
- print(f"准备下载图片{imgUrl}")
- # 下载图片
- img_response = requests.get(imgUrl)
- img_name = imgUrl.split('/')[-1]
- with open("img"+img_name, "wb") as f:
- f.write(img_response.content)
- print(f"图片{img_name}下载完成")
- print("第一页图片全部下载到当前目录了.....")
- driver.quit() # 关闭浏览器
复制代码
爬取多页
- import time
- import requests
- import random
- from bs4 import BeautifulSoup
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.chrome.options import Options
- # 设置Chrome选项
- chrome_options = Options()
- chrome_options.add_argument("--headless") # 无头模式,不打开浏览器窗口
- chrome_options.add_argument("--disable-gpu")
- chrome_options.add_argument("--no-sandbox")
- # 设置ChromeDriver路径
- service = Service('D:\env\python3\chromedriver.exe')
- url = 'https://www.umei.cc/touxiangtupian/nvshengtouxiang/'
- baseUrl = "https://www.umei.cc"
- # 初始化WebDriver
- driver = webdriver.Chrome(service=service, options=chrome_options)
- def getImage(url,page):
- driver.get(url)
- print(f"正在爬取第{page}页图片资源源...")
- print(url)
- time.sleep(random.uniform(5, 10)) # 等待页面加载
- html = driver.page_source # 原页面
- soup = BeautifulSoup(html, 'html.parser')
- # BeautifulSoup分析提取元素
- divList = soup.find_all("div",class_= "item masonry_brick")
- for divItem in divList:
- linkImage = divItem.find("div",class_ = "item_t").find("div",class_ = "img").find("a")["href"]
- linkImage = baseUrl + linkImage
- # 拿取子页面的大图
- driver.get(linkImage)
- time.sleep(random.uniform(5, 10))
- html = driver.page_source
- sonSoup = BeautifulSoup(html, 'html.parser')
- imgUrl = sonSoup.find("div",class_ = "tsmaincont-main-cont-txt").find("img")["src"]
- print(f"准备下载图片{imgUrl}")
- # 下载图片
- img_response = requests.get(imgUrl)
- img_name = imgUrl.split('/')[-1]
- with open("img"+img_name, "wb") as f:
- f.write(img_response.content)
- print(f"图片{img_name}下载完成")
- print(f"第{page}页图片全部下载到当前img目录了.....")
- # 爬取1-10页
- # 控制爬取的页面数
- for page in range(1, 11):
- if page == 1:
- getImage(url,page)
- else:
- pageUrl = f"{url}index_{page}.htm"
- getImage(pageUrl,page)
- driver.quit() # 关闭浏览器
复制代码

免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |