正则表达式还是很好玩的,爬取红牛官网分公司信息
import requests
import re
import pandas as pd
response = requests.get(url="http://www.redbull.com.cn/about/branch")
company = re.findall('<h2>(.*?)</h2>', response.text)
add = re.findall("<p class=\'mapIco\'>(.*?)</p>", response.text)
mail = re.findall("<p class=\'mailIco\'>(.*?)</p>",response.text)
tel = re.findall("<p class=\'telIco\'>(.*?)</p>",response.text)
df =pd.DataFrame({'company':company,'add':add,'mail':mail,'tel':tel})
df.to_excel(excel_writer="data/redbull_re.xlsx",index=None)
df.head()
爬取结果