1、安装依赖库
- #以下是安装http请求的第三方库
- pip install requests urllib3
- #以下是安装处理图片的第三方库
- pip install image pillow
- #以下是安装python解析html的第三方库
- pip install beautifulsoup4
复制代码 2、爬取图片的代码
- import requests
- import urllib3
- import urllib.parse
- import os
- from io import BytesIO
- from PIL import Image
- from bs4 import BeautifulSoup
- url = "URL"
- http = urllib3.PoolManager()
- header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"}
- request = http.request('POST', url, headers=header)
- '''以下测试用的
- #响应数据
- #print(request.data.decode())
- #响应头信息
- #print(request.headers)
- #状态码
- #print(request.status)
- '''
- data = request.data.decode()
- #print(data)
- soup = BeautifulSoup(data,"html.parser")
- # 查找所有<img>标签
- i = 0
- for img_tag in soup.find_all("img"):
- i += 1
- image_url = img_tag.get("src")
- #print(image_url)
- output_folder = r"C:\Users\zzx\Desktop\图片"+"/"
- if not os.path.exists(output_folder):
- os.makedirs(output_folder)
- #print(image_url)
- if image_url:
- # 将可能的相对 URL 转换为完整的 URL
- image_url_http = urllib.parse.urljoin(url,image_url)
- #print(image_url_http)
- resp = requests.get(image_url_http,stream=True)
- #print(resp)
- try:
- image_data = BytesIO(resp.content)
- #print(image_data)
- image = Image.open(image_data)
- if '.gif' not in image_url_http.split("/")[-1]:
- image.save(os.path.join(output_folder, f'image_{i}.png'))
- print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.png')}")
- elif '.gif' in image_url_http.split("/")[-1]:
- image.save(os.path.join(output_folder, f'image_{i}.gif'))
- print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.gif')}")
- except Exception as e :
- print(f"保存图像时出错:{e}")
- try:
- # 将图像转换为常见的 JPEG 格式
- if image.mode in ('RGBA', 'LA'):
- image = image.convert('RGB')
- image.save(os.path.join(output_folder, f'image_{i}.jpg'))
- print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.jpg')}")
- except Exception as e:
- print(f"转换并保存图像时出错: {e}")
- print(i,image_url)
复制代码 3、结果图
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |