任务描述
本关为练习关卡,请按照编程要求完成任务,获取美国各州2010年的人口密度排名。
import pandas as pd
import numpy as npdef task3():#********** Begin **********##读取三个csv文件pop = pd.DataFrame(pd.read_csv("./step3/state-population.csv"))ares = pd.DataFrame(pd.read_csv("./step3/state-areas.csv"))abbrevs = pd.DataFrame(pd.read_csv("./step3/state-abbrevs.csv"))# 合并pop和abbrevs并删除重复列df1 = pd.merge(pop, abbrevs, how='outer', left_on='state/region', right_on='abbreviation')df1 = df1.drop('abbreviation', axis=1)# 填充对应的全称df1.loc[df1['state/region'] == 'PR', 'state'] = 'Puerto Rico'df1.loc[df1['state/region'] == 'USA', 'state'] = 'United States'# 合并面积数据df1 = pd.merge(df1, ares, on='state', how='left')# 删掉这些缺失值df1 = df1.dropna()# 取year为2010年的数据,并将索引设为state列df1 = df1.loc[df1['year'] == 2010]df1.set_index('state')# 计算人口密度df1['population'] = df1['population'] / df1['area (sq. mi)']# 对密度求和a = df1.loc[df1['ages'] == 'under18']['population']b = df1.loc[df1['ages'] == 'total']['population']all = a.values + b.valuesall = pd.DataFrame(all, index=df1.loc[df1['ages'] == 'under18']['state'])# 对值进行排序all = all.sort_values(0, ascending=False)# 输出人口密度前5名和倒数5名print("前5名:")front = str(all.iloc[:5, 0])[:-24]print(front)print('dtype: float64')print("后5名:")back = str(all.iloc[-5:, 0])[:-24]print(back)print('dtype: float64')# ********** End **********#