源码如下:
import asyncio
import aiohttp
from lxml import etree
import logging
import datetime
import openpyxlwb = openpyxl.Workbook()
sheet = wb.active
sheet.append(['房源', '房子信息', '所在区域', '单价', '关注人数和发布时间', '标签'])
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s')
start = datetime.datetime.now()
class Spider(object):def __init__(self):self.semaphore = asyncio.Semaphore(6) # 信号量,控制协程数,防止爬的过快被反爬self.header = {"Host": "sh.lianjia.com","Referer": "https://sh.lianjia.com/ershoufang/","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) C