引言
准备工作
- requests: 用于发送HTTP请求。
- BeautifulSoup: 用于解析HTML文档。
- re: 用于正则表达式匹配。
你可以通过以下命令安装这些库:
pip install requests beautifulsoup4
抓取步骤
步骤1:分析网页结构
步骤2:发送请求获取HTML内容
使用requests库发送HTTP请求,获取网页的HTML内容。
import requests
url = 'http://example.com' # 替换为你要抓取图片的网页链接
response = requests.get(url)
html_content = response.text
步骤3:解析HTML获取图片链接
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
img_tags = soup.find_all('img')
步骤4:下载图片
import os
def download_images(img_tags, save_dir='images'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for img_tag in img_tags:
img_url = img_tag.get('src')
img_data = requests.get(img_url).content
img_name = img_url.split('/')[-1]
with open(os.path.join(save_dir, img_name), 'wb') as f:
f.write(img_data)
print(f'Image {img_name} downloaded.')
download_images(img_tags)
步骤5:保存图片
完整代码示例
import requests
from bs4 import BeautifulSoup
import os
def download_images(img_tags, save_dir='images'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for img_tag in img_tags:
img_url = img_tag.get('src')
img_data = requests.get(img_url).content
img_name = img_url.split('/')[-1]
with open(os.path.join(save_dir, img_name), 'wb') as f:
f.write(img_data)
print(f'Image {img_name} downloaded.')
def grab_images_from_url(url):
response = requests.get(url)
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
img_tags = soup.find_all('img')
download_images(img_tags)
# 使用示例
url = 'http://example.com' # 替换为你要抓取图片的网页链接
grab_images_from_url(url)