今天方向一个爬虫案例,爬取某厂招聘岗位信息数据,通过程序可以学习pymysql的使用,通过pycharm工具获取数据,并且导入mysql数据库中。
1 导入必要的包
import requests
import pymysql
2 主体代码
class Baidu(object):def __init__(self):self.db = pymysql.connect(host="127.0.0.1", user="root", password="88888888", db="test_db")self.cursor = self.db.cursor()self.url = 'https://talent.alibaba.com/position/search'self.headers = {'cookie': '自己的cookie','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/547.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/547.36'}self.params = {"_csrf": "09d5fe8f-08a2-4d3c-a43f"}def get_data(self, page): # 获取地址和User-Agentdata = {"channel": "group_official_site","language": "zh","batchId": "","categories": "","deptCodes": [],"key": "","pageIndex": page,"pageSize": 19, "regions": "","subCategories": ""}response = requests.post(url=self.url, params=self.params, headers=self.headers, json=data)return response.json()def parse_data(self, response):# print(response)data_list = response["content"]['datas']for node in data_list:workLocations = ','.join(node['workLocations'])name = node['name']requirement = node['requirement']self.save_data(workLocations, name, requirement)def create_table(self):# 使用预处理语句创建表sql = '''CREATE TABLE IF NOT EXISTS ali_quarter_bill(id int primary key auto_increment not null,workLocations VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL, requirement TEXT)'''try:self.cursor.execute(sql)print("CREATE TABLE SUCCESS.")except Exception as ex:print(f"CREATE TABLE FAILED,CASE:{ex}")def save_data(self, workLocations, name, requirement):# SQL 插入语句sql = 'INSERT INTO ali(id, workLocations, name, requirement) values(%s, %s, %s, %s)'# 执行 SQL 语句try:self.cursor.execute(sql, (0, workLocations, name, requirement))# 提交到数据库执行self.db.commit()print('数据插入成功...')except Exception as e:print(f'数据插入失败: {e}')# 如果发生错误就回滚self.db.rollback()def run(self):self.create_table()for i in range(1, 19):response = self.get_data(i)self.parse_data(response)# 关闭数据库连接self.db.close()
if __name__ == '__main__':baidu = Baidu()baidu.run()
结果: