利用AI辅助写了一个程序:
import requests
from bs4 import BeautifulSoup
import time
# 设置请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 目标URL
# url = 'https://movie.douban.com/top250'
for i in range(10): # 控制爬虫请求页数为10页
start = str(i*25)
url = "https://movie.douban.com/top250?start=" + start + "&filter="
# 每页之间的间隔时间为1秒,避免被反爬虫机制检测到
# movie_list = get_movie_list(url) # 调用get_movie_list函数获取当前页的电影列表
# print(f"第{i+1}页的电影列表为:") # 在控制台输出当前页的信息
# for movie in movie_list: # 遍历当前页的电影列表并输出每个电影的信息
# print(movie)
print(url)
# 发送请求并获取响应内容
response = requests.get(url, headers=headers)
html_content = response.text
# print(html_content)
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(html_content, 'html.parser')
# print(response.status_code)
# 获取所有电影列表项
movie_items = soup.find_all('div', class_='item')
# 遍历电影列表项并输出电影名称和评分
for movie_item in movie_items:
title = movie_item.find('span', class_='title').text.strip()
rating = movie_item.find('span', class_='rating_num').text.strip()
link = movie_item.find('a',{'href': True})
print(title + ' - ' + rating)
print(link['href'])
time.sleep(3)