本案例用到列表,函数,字符串等知识点,知识点参考链接如下:
python基础知识(一)&输入输出函数
python基础知识(二)&基本命令
python基础知识(三)&常用的内置函数
python基础知识(四)&符串常用的方法
python基础知识(六)&字典
python基础知识(七)& 列表
python基础知识(八)&open函数
python基础知识(九)&函数
完整代码如下:
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csvdef qingqiu(url):rq = requests.get(url, headers=headers)html = rq.textreturn htmldef paser_html(html):bs=BeautifulSoup(html,"lxml")price_all=[]price=bs.select('span[class=""]')for p in price:price_all.append(float(p.get_text()))danjia_all=[]danjia=bs.select('div[class="unitPrice"]')for d in danjia:danjia_all.append(d.get_text().replace("元/平",""))loupan_all=[]loupan=bs.select('a[data-el="region"]')for l in loupan:loupan_all.append(l.get_text())all=bs.select('div[class="houseInfo"]')all_data=[]for a in all:all_data.append(a.get_text().split('|'))huxing=[]size=[]fangxiang=[]zhuangxiu=[]louceng=[]for i in all_data:huxing.append(i[0])size.append(float(i[1].replace("平米","")))fangxiang.append(i[2])zhuangxiu.append(i[3])louceng.append(i[4])result=zip(loupan_all,danjia_all,price_all,huxing,size,fangxiang,zhuangxiu,louceng)return resultdef save(result): # 保存with open('长沙二手房.csv', "a", newline='', encoding='utf-8') as f:wr = csv.writer(f)for a in result:wr.writerow(a)if __name__ == '__main__':T_head = ['楼盘', '单价', '总价', '户型', '面积', '方向', '装修','楼层']with open('长沙二手房.csv', "a", newline='', encoding='utf-8') as f:wr = csv.writer(f)wr.writerow(T_head)start_ye = int(input("请输入起始页:"))end_ye = int(input("请输入终止页:"))for y in range(start_ye, end_ye):url='https://cs.lianjia.com/ershoufang/pg{}/'.format(y)headers = {'User-Agent':'Mozilla/5.0'}html=qingqiu(url)result=paser_html(html)save(result)print("爬取第{}页".format(y))
运行后保存的数据如下表所示: