0) 参考文章
- python超详细基础文件操作【建议收藏】_python程序设计基础+文件操作-CSDN博客
- [Python学习篇] Python文件操作_python 写文件-CSDN博客
- Python超详细基础文件操作(详解版)_python文件操作-CSDN博客
1) 前置知识(字符编码)
- 在第三方软件上获取到的文件在本地渲染时,出现乱码,一定是字符编码不对
- 默认使用utf8编码对文件内容进行解析
- 但是有的文件内容使用gbk编码,与默认的解码方式不同因此形成的乱码
- 方法: 编解码一致就行了
-
# (1) 字符串进行编码,得到新的数据类型:字节(bytes) s = "yuan" s = "昊" # ret = s.encode() # 默认utf8 ret = s.encode("gbk") # b'\xea\xbb' print(ret) print(type(ret)) # <class 'bytes'># 补充: s2 = "yuan".encode() # s2 = b"yuan苑" # b: 字节类型 print(type(s2)) # <class 'bytes'># (2) 将字节串解码成字符串 data = b'\xea\xbb' print(type(data)) # <class 'bytes'> # 解码 字节的内置方法:decode ret = data.decode("gbk") # 默认utf8解码 print(ret) # 昊# 字节应用: 磁盘存储,网络传输
2) 文件操作
- 文件句柄
-
# 返回的是一个文件的操作句柄 # window系统需要添加encoding="utf8"参数 f = open("05 hello.txt", mode="r", encoding="utf8")# 读所有字符 data = f.read() print(data)
- 文件读操作
-
# 返回的是一个文件的操作句柄 f = open("06 relax小诗",encoding="utf8") # (1) 读所有字符 data = f.read() print(data)# 光标位置 print(f.tell())# 调整光标位置 f.seek(0) data = f.read() print(data)# (2) 读指定数量的字符 data01 = f.read(6) print(data01) # 打印六行字符 f.seek(230) # 偏移量 230个字符 data01 = f.read(6) print(data01)# (3) 读一行 line01 = f.readline() print(line01,end="") line01 = f.readline() print(line01,end="") line01 = f.readline() print(line01,end="")# (4) 读所有行 lines = f.readlines() for line in lines:print(line,end="")# (5) 遍历文件 for line in f:if len(line)>10:print(line, end="")
- 文件写操作
-
f = open("09 hello2.txt", mode="w",encoding="utf8")# (1) f.write f.write("hello,苑昊!\n") f.write("hello,yuan!")# (2) f.writelines f.writelines(["aaa\n","bbb\n"]) lines = open("06 relax小诗",encoding="utf8").readlines() f.writelines(lines)# (3) mode ="a" 追加:append f = open("09 hello2.txt", mode="a") f.write("hello,peach!\n")
- 文件管理
-
# 版本1: f = open("09 hello2.txt",encoding="utf8") data = f.read() print(data) f.close()# 版本2: with open("09 hello2.txt",encoding="utf8") as f:data = f.read()print(data)
- 字节操作
-
# (1) mode:rb with open("09 hello2.txt", "rb") as f:data = f.read()print(data)print(type(data)) # <class 'bytes'>print(data.decode())# (2) mode:wb # with open("12 微博图片 hello3.txt", "wb",encoding="gbk") as f: # f.write("hello 中国!".encode("GBK"))
3) 实际应用
- 图片下载
-
# 版本1: with open("卡通.jpg", "rb") as f_read: # 读取图片二进制编码data = f_read.read() # 变量持久存储with open("卡通2.jpg", "wb") as f_write: # 新文件写入读取的二进制编码f_write.write(data) # 写入二进制# 版本2: with open("卡通3.jpg", "wb") as f_write:with open("卡通.jpg", "rb") as f_read:f_write.write(f_read.read())
- 爬虫下载图片
-
import requests# 案例1 # 爬虫 res = requests.get("https://pic.netbian.com/uploads/allimg/240112/001654-17049898140369.jpg") # print(res.content)# 写文件 with open("美女.jpg","wb") as f:f.write(res.content)# 案例2with open("美女2.jpg","wb") as f:f.write(res.content)
4) openpyxl模块(写入excel表格)
-
import openpyxl# (1) 读取Excel文件 # 打开指定的execl对象 读取默认工作簿 workbook = openpyxl.load_workbook('example.xlsx') # 工作簿中创建一个工作表对象 sheet = workbook['Sheet']# 获取单元格数据 value = sheet['A1'].value print(value) # 获取对应工作簿的A1单元格 名字value = sheet['B2'].value print(value) # 2023 / 爱尔兰 英国 美国 匈牙利 / 剧情 喜剧 爱情 科幻 / 欧格斯·兰斯莫斯 / 艾玛·斯通 马克·鲁弗洛sheet['A7'].value = "沈腾 飞驰人生1" # 保存修改后的Excel文件 workbook.save('example.xlsx')# (2) 创建Excel并写数据 # 创建新工作簿 workbook = openpyxl.Workbook()# 获取默认工作表 sheet = workbook.active# (1) 写入数据到单元格 sheet['A1'] = 'Hello' sheet['B1'] = 'World'# (2) 写入一行数据 sheet.append([1, 2, 3, 4, 5, 7])for i in range(1, 101):sheet.append([i, i ** 2, i ** 3, i ** 4])# 保存工作簿 workbook.save('new_example.xlsx')
5) 爬虫案例
-
import requests import openpyxlcookies = {'ll': '"108288"','bid': 'n1IbnM-UzkI','Hm_lvt_6d4a8cfea88fa457c3127e14fb5fabc2': '1698153214','_ga': 'GA1.2.1489184474.1698153214','_ga_Y4GN1R87RG': 'GS1.1.1698153214.1.0.1698153217.0.0.0','douban-fav-remind': '1','ap_v': '0,6.0','__utma': '30149280.947861648.1695648345.1708952551.1711004385.23','__utmz': '30149280.1711004385.23.4.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/', }headers = {'Accept': 'application/json, text/plain, */*','Accept-Language': 'zh-CN,zh;q=0.9','Cache-Control': 'no-cache','Connection': 'keep-alive','Origin': 'https://movie.douban.com','Pragma': 'no-cache','Referer': 'https://movie.douban.com/explore','Sec-Fetch-Dest': 'empty','Sec-Fetch-Mode': 'cors','Sec-Fetch-Site': 'same-site','User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36','sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"macOS"', }response = requests.get('https://m.douban.com/rexxar/api/v2/movie/recommend?refresh=0&start=0&count=120&selected_categories=%7B%22%E7%B1%BB%E5%9E%8B%22:%22%E5%96%9C%E5%89%A7%22%7D&uncollect=false&tags=%E5%96%9C%E5%89%A7',cookies=cookies,headers=headers, )data = response.json().get("items") # print(data)movie_list = []for item in data:if item.get("type") == "movie":title = item.get("title")card_subtitle = item.get("card_subtitle")count = item.get("rating").get("count")value = item.get("rating").get("value")large_pic = item.get("pic").get("large")movie_list.append([title, card_subtitle, value, count, large_pic])print("movie_list:",movie_list)# 存储到excel表格: # 创建新工作簿 workbook = openpyxl.Workbook() # 获取默认工作表 sheet = workbook.active # 写入header sheet.append(["名字", "副标题", "评分", "评分人数", "海报"]) # 写入数据for movie_item in movie_list:sheet.append(movie_item)# save: 保存路径 保存数据 workbook.save('douban.xlsx')