引言

准备工作

  1. requests: 用于发送HTTP请求。
  2. BeautifulSoup: 用于解析HTML文档。
  3. re: 用于正则表达式匹配。

你可以通过以下命令安装这些库:

pip install requests beautifulsoup4

抓取步骤

步骤1:分析网页结构

步骤2:发送请求获取HTML内容

使用requests库发送HTTP请求,获取网页的HTML内容。

import requests

url = 'http://example.com'  # 替换为你要抓取图片的网页链接
response = requests.get(url)
html_content = response.text

步骤3:解析HTML获取图片链接

from bs4 import BeautifulSoup

soup = BeautifulSoup(html_content, 'html.parser')
img_tags = soup.find_all('img')

步骤4:下载图片

import os

def download_images(img_tags, save_dir='images'):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    for img_tag in img_tags:
        img_url = img_tag.get('src')
        img_data = requests.get(img_url).content
        img_name = img_url.split('/')[-1]
        with open(os.path.join(save_dir, img_name), 'wb') as f:
            f.write(img_data)
        print(f'Image {img_name} downloaded.')

download_images(img_tags)

步骤5:保存图片

完整代码示例

import requests
from bs4 import BeautifulSoup
import os

def download_images(img_tags, save_dir='images'):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    for img_tag in img_tags:
        img_url = img_tag.get('src')
        img_data = requests.get(img_url).content
        img_name = img_url.split('/')[-1]
        with open(os.path.join(save_dir, img_name), 'wb') as f:
            f.write(img_data)
        print(f'Image {img_name} downloaded.')

def grab_images_from_url(url):
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    img_tags = soup.find_all('img')
    download_images(img_tags)

# 使用示例
url = 'http://example.com'  # 替换为你要抓取图片的网页链接
grab_images_from_url(url)

总结