
一.CSV写入与读取
1.写入

import csv
#引用csv模块。
csv_file = open('demo.csv','w',newline='',encoding='utf-8')
#调用open()函数打开csv文件,传入参数:文件名“demo.csv”、写入模式“w”、newline=''、encoding='utf-8'。
writer = csv.writer(csv_file)
# 用csv.writer()函数创建一个writer对象。
writer.writerow(['电影','豆瓣评分'])
#调用writer对象的writerow()方法,可以在csv文件里写入一行文字 “电影”和“豆瓣评分”。
writer.writerow(['银河护卫队','8.0'])
#在csv文件里写入一行文字 “银河护卫队”和“8.0”。
writer.writerow(['复仇者联盟','8.1'])
#在csv文件里写入一行文字 “复仇者联盟”和“8.1”。
csv_file.close()
#写入完成后,关闭文件就大功告成啦!

2.读取

import csv
csv_file=open('demo.csv','r',newline='',encoding='utf-8')
reader=csv.reader(csv_file)
for row in reader:
print(row)
csv模块官方文档链接: https://yiyibooks.cn/xx/python_352/library/csv.html#module-csv
案例:把豆瓣TOP250里面的 序号/电影名/评分/推荐语/链接 都爬取下来,结果是存储在csv
import requests
from bs4 import BeautifulSoup
import csv
Url = 'https://movie.douban.com/top250?start='
startIndex = 0
trail = '&filter='
csv_file = open('movie.csv','w',newline='',encoding='utf-8')
writer = csv.writer(csv_file)
def GetNeedURL():
return Url + str(startIndex)+trail
movies = []
for i in range(10):
startIndex = i * 25
res = requests.get(GetNeedURL())
soup = BeautifulSoup(res.text,'html.parser')
oneMovies = soup.find_all('div',class_ = 'item')
for item in oneMovies:
index = item.find('em').text
name = item.find('img')['alt'];
URL = item.find('div',class_ = 'hd').find('a')['href']
grade = item.find('span',class_ = 'rating_num').text
if item.find('p',class_ = 'quote'):
Recommendations = item.find('p',class_ = 'quote').text
Recommendation = Recommendations[1 : len(Recommendations)-1]
writer.writerow([index,name,URL,grade,Recommendation])
movies.append([index,name,URL,grade,Recommendation])
print(movies)
csv_file.close()
rb = open('movie.csv','r',newline='',encoding='utf-8')
re = csv.reader(rb)
for row in re:
print(row)
二.Excel写入与读取
一个Excel文档也称为一个工作薄(workbook),每个工作薄里可以有多个工作表(wordsheet),当前打开的工作表又叫活动表。
每个工作表里有行和列,特定的行与列相交的方格称为单元格(cell)。比如下图第A列和第1行相交的方格我们可以直接表示为A1单元格。

1.写入

import openpyxl
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='new title'
sheet['A1'] = '漫威宇宙'
rows= [['美国队长','钢铁侠','蜘蛛侠'],['是','漫威','宇宙', '经典','人物']]
for i in rows:
sheet.append(i)
print(rows)
wb.save('Marvel.xlsx')
2.读取(请看代码,图片中写法有误)

#读取的代码:
wb = openpyxl.load_workbook('Marvel.xlsx')
sheet = wb['new title']
sheetname = wb.sheetnames
#sheetnames是用来获取工作薄所有工作表的名字的。如果你不知道工作薄到底有几个工作表,就可以把工作表的名字都打印出来。
print(sheetname)
A1_cell = sheet['A1']
A1_value = A1_cell.value
print(A1_value)
openpyxl模块的官方文档:https://openpyxl.readthedocs.io/en/stable/
案例 : 把豆瓣TOP250里面的 序号/电影名/评分/推荐语/链接 都爬取下来,结果是存储在Excel中
import requests
from bs4 import BeautifulSoup
import openpyxl
Url = 'https://movie.douban.com/top250?start='
startIndex = 0
trail = '&filter='
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = 'new'
def GetNeedURL():
return Url + str(startIndex)+trail
movies = []
for i in range(10):
startIndex = i * 25
res = requests.get(GetNeedURL())
soup = BeautifulSoup(res.text,'html.parser')
oneMovies = soup.find_all('div',class_ = 'item')
for item in oneMovies:
index = item.find('em').text
name = item.find('img')['alt'];
URL = item.find('div',class_ = 'hd').find('a')['href']
grade = item.find('span',class_ = 'rating_num').text
if item.find('p',class_ = 'quote'):
Recommendations = item.find('p',class_ = 'quote').text
Recommendation = Recommendations[1 : len(Recommendations)-1]
sheet.append([index,name,URL,grade,Recommendation])
movies.append([index,name,URL,grade,Recommendation])
wb.save('movie.xlsx')
print(movies)
rb = openpyxl.load_workbook('movie.xlsx')
rs = rb['new']
for row in rs.values:
print(row)
网友评论