爬取豆瓣评分前TOP250电影
内容简单,用拼接url,requser请求到数据,然后存储为csv表格文件存储
Python3 下运行,文件存储在程序所在目录。
import requests,bs4,csv
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
with open('a.csv','w',encoding='gbk') as ls:
list_1 = csv.writer(ls)
aa = ['序号','电影名称','豆瓣评分','推荐语','电影链接']
data = []
data.append(aa)
for x in range(10):
url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter='
res = requests.get(url, headers=headers)
bs = bs4.BeautifulSoup(res.text, 'html.parser')
bs = bs.find('ol', class_="grid_view")
for titles in bs.find_all('li'):
num = titles.find('em',class_="").text
title = titles.find('span', class_="title").text
comment = titles.find('span',class_="rating_num").text
url_movie = titles.find('a')['href']
if titles.find('span',class_="inq") != None:
tes = titles.find('span',class_="inq").text
a = [num,title,comment,tes,url_movie]
else:
a = [num,title,comment,tes,url_movie]
data.append(a)
print(a)
print('正在写入')
for i in data:
list_1.writerow(i)
print('写入完成')