import pandas as pd
pandas值series创建
t = pd.Series([1, 2, 31, 12, 3, 4])
t
0 1
1 2
2 31
3 12
4 3
5 4
dtype: int64
type(t)
pandas.core.series.Series
series指定索引
t2 = pd.Series([1,23,3,2,3],index=list('abcde'))
t2
a 1
b 23
c 3
d 2
e 3
dtype: int64
import numpy as np
import string
t2 = pd.Series(np.arange(10),index=list(string.ascii_uppercase[:10]))
t2
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int32
通过字典创建一个series
temp_dict = {'name':'xiaohong','age':18,'tel':10086}
temp_dict
{'name': 'xiaohong', 'age': 18, 'tel': 10086}
t3 = pd.Series(temp_dict)
t3
name xiaohong
age 18
tel 10086
dtype: object
t3.dtype
dtype('O')
Pandas切片
t3['age']
18
t3[0]
'xiaohong'
t3[[1,2]]
age 18
tel 10086
dtype: object
t3[:3]
name xiaohong
age 18
tel 10086
dtype: object
t3[['age','tel']]
age 18
tel 10086
dtype: object
t
0 1
1 2
2 31
3 12
4 3
5 4
dtype: int64
t[t>4]
2 31
3 12
dtype: int64
pandas取出索引
t3.index
Index(['name', 'age', 'tel'], dtype='object')
for i in t3.index:print(i)
name
age
tel
type(t3.index)
pandas.core.indexes.base.Index
list(t3.index)[:2]
['name', 'age']
t3.values
array(['xiaohong', 18, 10086], dtype=object)
type(t3.values)
numpy.ndarray
读取文件
df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
... | ... | ... | ... | ... | ... |
---|
152994 | 3 | 100 | 1.051 | 0.090 | -0.262 |
---|
152995 | 3 | 100 | 0.918 | 0.039 | -0.129 |
---|
152996 | 3 | 100 | 1.156 | -0.094 | -0.227 |
---|
152997 | 3 | 100 | 0.934 | 0.203 | -0.172 |
---|
152998 | 3 | 100 | 1.199 | -0.176 | 0.109 |
---|
152999 rows × 5 columns
df.head(10)
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
5 | 1 | 20 | 0.973 | -0.055 | -0.109 |
---|
6 | 1 | 20 | 1.000 | 0.012 | -0.133 |
---|
7 | 1 | 20 | 0.969 | -0.102 | -0.141 |
---|
8 | 1 | 20 | 0.973 | -0.059 | -0.125 |
---|
9 | 1 | 20 | 1.012 | 0.043 | -0.133 |
---|
import pandas as pd
import numpy as np
pd.DataFrame(np.arange(12).reshape(3,4))
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'), columns=list('WXYZ'))
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
d1 = {'name':['xiaoming','xiaogang'],'age':[12,20]}
d1
{'name': ['xiaoming', 'xiaogang'], 'age': [12, 20]}
t1 = pd.DataFrame(d1)
t1
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| name | age |
---|
0 | xiaoming | 12 |
---|
1 | xiaogang | 20 |
---|
d2 = [{'name':'xioahong','age':20,'tel':10020},{'name':'xioaming','tel':123231},{'name':'xiaowang','age':18}]
d2
[{'name': 'xioahong', 'age': 20, 'tel': 10020},{'name': 'xioaming', 'tel': 123231},{'name': 'xiaowang', 'age': 18}]
t2 = pd.DataFrame(d2)
t2
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| name | age | tel |
---|
0 | xioahong | 20.0 | 10020.0 |
---|
1 | xioaming | NaN | 123231.0 |
---|
2 | xiaowang | 18.0 | NaN |
---|
df = pd.read_csv('./jd.csv')
print(df.head())
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 乐高京东自营旗舰店 \
0 林家铺子水果罐头 什锦罐头 200g*2罐 林家铺子官方旗舰店
1 羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!) 中信出版社
2 【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元 易士捷通讯充值拼购专营店
3 豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货 邻家小厨生鲜专营店
4 伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)... 伊利母婴京东自营旗舰店 764 1099.00 https://item.jd.com/100017067554.html
0 8274 6.90 https://item.jd.com/10029836000540.html
1 42023 49.00 https://item.jd.com/13598042.html
2 664 191.99 https://item.jd.com/200151598576.html
3 32 39.00 https://item.jd.com/65414277974.html
4 1480578 146.00 https://item.jd.com/1100526.html
print(df.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6054 entries, 0 to 6053
Data columns (total 5 columns):# Column Non-Null Count Dtype
--- ------ -------------- ----- 0 乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 6054 non-null object 1 乐高京东自营旗舰店 5894 non-null object 2 764 6054 non-null int64 3 1099.00 6054 non-null float644 https://item.jd.com/100017067554.html 6054 non-null object
dtypes: float64(1), int64(1), object(3)
memory usage: 236.6+ KB
None
print(df[:20])
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 \
0 林家铺子水果罐头 什锦罐头 200g*2罐
1 羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)
2 【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元
3 豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货
4 伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)...
5 【欧洲进口】法国原瓶进口 Roux家族黑舰经典混酿干红葡萄酒红酒送礼佳品750ml*6瓶整箱
6 良品铺子 香酥脆灰枣 酥脆小枣即食无核脆枣红枣干蜜饯果干休闲零食量贩装400g
7 小黄鸭(B.Duck)小学生书包男童女童一三年级男孩儿童减负护脊双肩包 sbd80008黄色
8 稳健医用外科口罩一次性医用口罩成人儿童可选 稳健口罩 三层防护 透气薄款防细菌口罩医用 1盒...
9 嗨吃家 酸辣粉清真宽粉112g*12袋
10 日本进口 黛珂Cosme Decorte牛油果乳液150ml 补水保湿 软化肤质 改善粗糙 ...
11 嗨吃家正宗铁棍山药粉皮200g*5袋速食
12 小鹿蓝蓝_酸奶溶豆 宝宝零食益生菌享6个月食谱 4口味各1盒
13 嗨吃家热干面176g*6袋
14 【药房直售】康速达 痔立克痔疮膏冷敷凝胶内外混合痔疮肉球肛门瘙痒男女 (周期型)实发两盒
15 蒙牛 酸酸乳 原味250ml×24 礼盒装
16 蒂佳婷Dr.Jart+ 绿丸面膜贴片 舒缓镇静 补水保湿 水动力舒缓补水绿丸面膜25g*5片...
17 法国原瓶进口 杰朗克西里尔 赤霞珠 干红 葡萄酒 750ml 双支装
18 土土优选丹麦风味曲奇饼干 皇冠品质早餐网红休闲办公室零食年货72g/盒 十盒*(丹麦风味曲奇...
19 善存维生素C咀嚼片香橙口味补充维C120片 1盒 1盒*(15+15+90)片 乐高京东自营旗舰店 764 1099.00 \
0 林家铺子官方旗舰店 8274 6.90
1 中信出版社 42023 49.00
2 易士捷通讯充值拼购专营店 664 191.99
3 邻家小厨生鲜专营店 32 39.00
4 伊利母婴京东自营旗舰店 1480578 146.00
5 玫嘉官方旗舰店 540 298.00
6 良品铺子京东自营旗舰店 5747 17.90
7 尚喜屋母婴旗舰店 9 88.00
8 稳健官方旗舰店 53390 16.90
9 燕之北旗舰店 107 39.90
10 京东国际美妆自营跨境免税店 89640 289.00
11 燕之北旗舰店 10 39.90
12 小鹿蓝蓝旗舰店 1275 54.00
13 燕之北旗舰店 32 26.90
14 颐鹤堂大药房旗舰店 4553 69.00
15 蒙牛京东自营旗舰店 799422 44.90
16 蒂佳婷(Dr.Jart)海外京东自营专区 747604 98.00
17 禧家拾粮酒类旗舰店 27 39.90
18 土土优选官方旗舰店 14849 19.90
19 益尔益旗舰店 2040 49.00 https://item.jd.com/100017067554.html
0 https://item.jd.com/10029836000540.html
1 https://item.jd.com/13598042.html
2 https://item.jd.com/200151598576.html
3 https://item.jd.com/65414277974.html
4 https://item.jd.com/1100526.html
5 https://item.jd.com/22453030555.html
6 https://item.jd.com/100027854140.html
7 https://item.jd.com/10030112475646.html
8 https://item.jd.com/10021189665333.html
9 https://item.jd.com/10035393346580.html
10 https://item.jd.com/4972612.html
11 https://item.jd.com/10035809618060.html
12 https://item.jd.com/10038718351041.html
13 https://item.jd.com/10035527980479.html
14 https://item.jd.com/10033781790177.html
15 https://item.jd.com/1411416.html
16 https://item.jd.com/4858894.html
17 https://item.jd.com/10028867267738.html
18 https://item.jd.com/10026591565614.html
19 https://item.jd.com/47384323647.html
pandas爬取注意点
方括号写数组,表示取行,对行进行操作
取列表示取列索引,对列进行操作
print(df['764'])
0 8274
1 42023
2 664
3 32
4 1480578...
6049 1952366
6050 769
6051 137
6052 21686
6053 276
Name: 764, Length: 6054, dtype: int64
print(type(df['764']))
<class 'pandas.core.series.Series'>
t3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('WXYZ'))
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.loc['a','Z']
3
type(t3.loc['a','Z'])
numpy.int32
t3.loc['a']
W 0
X 1
Y 2
Z 3
Name: a, dtype: int32
t3.loc[:,'Y']
a 2
b 6
c 10
Name: Y, dtype: int32
t3.loc[['a','c'],:]
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.loc[['a','c'],['W','Z']]
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.iloc[1]
W 4
X 5
Y 6
Z 7
Name: b, dtype: int32
t3.iloc[:,2]
a 2
b 6
c 10
Name: Y, dtype: int32
t3.iloc[:,[2,1]]
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.iloc[1:,:2] = np.nan
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| W | X | Y | Z |
---|
a | 0.0 | 1.0 | 2 | 3 |
---|
b | NaN | NaN | 6 | 7 |
---|
c | NaN | NaN | 10 | 11 |
---|
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 | 乐高京东自营旗舰店 | 764 | 1099.00 | https://item.jd.com/100017067554.html |
---|
0 | 林家铺子水果罐头 什锦罐头 200g*2罐 | 林家铺子官方旗舰店 | 8274 | 6.90 | https://item.jd.com/10029836000540.html |
---|
1 | 羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!) | 中信出版社 | 42023 | 49.00 | https://item.jd.com/13598042.html |
---|
2 | 【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元 | 易士捷通讯充值拼购专营店 | 664 | 191.99 | https://item.jd.com/200151598576.html |
---|
3 | 豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货 | 邻家小厨生鲜专营店 | 32 | 39.00 | https://item.jd.com/65414277974.html |
---|
4 | 伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)... | 伊利母婴京东自营旗舰店 | 1480578 | 146.00 | https://item.jd.com/1100526.html |
---|
... | ... | ... | ... | ... | ... |
---|
6049 | 贝亲(Pigeon)宽口径玻璃奶瓶奶嘴套装 婴儿奶瓶240ml+自然实感婴儿奶嘴(L码+LL... | 贝亲(Pigeon)京东自营旗舰店 | 1952366 | 172.00 | https://item.jd.com/7639987.html |
---|
6050 | 尤果(YOUGUO)衣架子带晾衣夹子折叠晾衣架晒袜子架内衣架神器32夹子 可折叠加厚【1个3... | 尤果生活日用拼购旗舰店 | 769 | 15.90 | https://item.jd.com/10031481561764.html |
---|
6051 | 匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42 | 匹克官方旗舰店 | 137 | 669.00 | https://item.jd.com/10039423932347.html |
---|
6052 | 超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠 | 超能京东自营官方旗舰店 | 21686 | 119.00 | https://item.jd.com/100011740813.html |
---|
6053 | 8册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍 | 凤凰新华书店旗舰店 | 276 | 15.80 | https://item.jd.com/71219454726.html |
---|
6054 rows × 5 columns
df.index
RangeIndex(start=0, stop=6054, step=1)
df.columns
Index(['乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物','乐高京东自营旗舰店', '764', '1099.00', 'https://item.jd.com/100017067554.html'],dtype='object')
df.dtypes
乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 object
乐高京东自营旗舰店 object
764 int64
1099.00 float64
https://item.jd.com/100017067554.html object
dtype: object
df.values
array([['林家铺子水果罐头 什锦罐头 200g*2罐', '林家铺子官方旗舰店', 8274, 6.9,'https://item.jd.com/10029836000540.html'],['羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!)', '中信出版社',42023, 49.0, 'https://item.jd.com/13598042.html'],['【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元', '易士捷通讯充值拼购专营店', 664,191.99, 'https://item.jd.com/200151598576.html'],...,['匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42', '匹克官方旗舰店', 137,669.0, 'https://item.jd.com/10039423932347.html'],['超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠', '超能京东自营官方旗舰店',21686, 119.0, 'https://item.jd.com/100011740813.html'],['8册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍', '凤凰新华书店旗舰店', 276,15.8, 'https://item.jd.com/71219454726.html']], dtype=object)
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 | 乐高京东自营旗舰店 | 764 | 1099.00 | https://item.jd.com/100017067554.html |
---|
0 | 林家铺子水果罐头 什锦罐头 200g*2罐 | 林家铺子官方旗舰店 | 8274 | 6.90 | https://item.jd.com/10029836000540.html |
---|
1 | 羽生结弦:王者之路( 超人气花样滑冰冠军羽生结弦全新传记,全面展示羽生10年成长经历和心路历程!) | 中信出版社 | 42023 | 49.00 | https://item.jd.com/13598042.html |
---|
2 | 【话费慢充】全国电信话费充值手机特惠慢充话费200元 72小时内到账 200元 | 易士捷通讯充值拼购专营店 | 664 | 191.99 | https://item.jd.com/200151598576.html |
---|
3 | 豪皇 潮汕牛肉丸500g*2包 火锅食材牛丸 烧烤丸串生鲜潮汕年夜饭火锅丸子 汕头手打牛肉丸 年货 | 邻家小厨生鲜专营店 | 32 | 39.00 | https://item.jd.com/65414277974.html |
---|
4 | 伊利奶粉【全新升级】 金领冠系列 幼儿配方奶粉 3段1200克特惠三联装(1-3岁幼儿适用)... | 伊利母婴京东自营旗舰店 | 1480578 | 146.00 | https://item.jd.com/1100526.html |
---|
... | ... | ... | ... | ... | ... |
---|
6049 | 贝亲(Pigeon)宽口径玻璃奶瓶奶嘴套装 婴儿奶瓶240ml+自然实感婴儿奶嘴(L码+LL... | 贝亲(Pigeon)京东自营旗舰店 | 1952366 | 172.00 | https://item.jd.com/7639987.html |
---|
6050 | 尤果(YOUGUO)衣架子带晾衣夹子折叠晾衣架晒袜子架内衣架神器32夹子 可折叠加厚【1个3... | 尤果生活日用拼购旗舰店 | 769 | 15.90 | https://item.jd.com/10031481561764.html |
---|
6051 | 匹克态极闪现3代篮球鞋男2022春季新款耐磨缓震篮球运动鞋男鞋 大白-气泡配色 42 | 匹克官方旗舰店 | 137 | 669.00 | https://item.jd.com/10039423932347.html |
---|
6052 | 超能 洗衣凝珠 洗衣凝珠 100颗 防串色 浓缩 酵素 香水味 花香型 洗衣球 洗衣珠 | 超能京东自营官方旗舰店 | 21686 | 119.00 | https://item.jd.com/100011740813.html |
---|
6053 | 8册专注力训练书找不同迷宫书3-6岁儿童注意力观察记忆力智力开发全脑开发思维训练书籍 | 凤凰新华书店旗舰店 | 276 | 15.80 | https://item.jd.com/71219454726.html |
---|
6054 rows × 5 columns
mean_data = df['1099.00']
mean_data
0 6.90
1 49.00
2 191.99
3 39.00
4 146.00...
6049 172.00
6050 15.90
6051 669.00
6052 119.00
6053 15.80
Name: 1099.00, Length: 6054, dtype: float64
print('商品均价',mean_data.mean())
商品均价 332.4171737693964
df[mean_data==mean_data.min()]
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 乐高(LEGO)积木 艺术系列ART 31202 米奇米妮 18岁+ 儿童玩具 马赛克像素画 男孩女孩成人情人节礼物 | 乐高京东自营旗舰店 | 764 | 1099.00 | https://item.jd.com/100017067554.html |
---|
286 | 补运费专拍链接 | 熊出没官方旗舰店 | 0 | 1.0 | https://item.jd.com/10042346578090.html |
---|
1906 | 【京选99新】苹果iPhone 12 ProMax 256GB 石墨色5G全网通 S12 | 勇科手机 | 2 | 1.0 | https://item.jd.com/10040790836846.html |
---|
2047 | Yottoy 瑜伽入门学习教程 | yottoy京东自营旗舰店 | 11 | 1.0 | https://item.jd.com/100018075841.html |
---|
2791 | 运费补运费专用链接(请勿单独拍) 补运费专用链接 | 荷尔健康大药房旗舰店 | 7 | 1.0 | https://item.jd.com/10023059152178.html |
---|
3854 | 【准新机】【在保280天以上】iPhone13ProMax 5G全网通256G远峰蓝S18 | 勇科手机 | 1 | 1.0 | https://item.jd.com/10041957238447.html |
---|
4398 | 定金 别克昂科拉 试驾享原厂精美试驾礼 【新车汽车买车SUV】 具体车型请与线下经销商协定 | 上汽通用别克官方旗舰店 | 0 | 1.0 | https://item.jd.com/68629491955.html |
---|
4491 | 贵州茅台镇酱香型白酒整箱53度粮食窖藏老酒年货送礼酒水饮品江左盟大曲酱香酒 单瓶装 | 遵巡酒类专营店 | 2427 | 1.0 | https://item.jd.com/10028009269896.html |
---|
5888 | 【准新机】【在保280天以上】iPhone13ProMax 5G全网通256G金色 S11 | 勇科手机 | 0 | 1.0 | https://item.jd.com/10041957046439.html |
---|
print('总共有'+str(df['764'].count())+'个商品')
总共有6054个商品
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| W | X | Y | Z |
---|
a | 0.0 | 1.0 | 2 | 3 |
---|
b | NaN | NaN | 6 | 7 |
---|
c | NaN | NaN | 10 | 11 |
---|
t3[pd.notnull(t3['W'])]
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.dropna(axis=0)
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3.dropna(axis=0,how='any',inplace=True)
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t2
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| name | age | tel |
---|
0 | xioahong | 20.0 | 10020.0 |
---|
1 | xioaming | NaN | 123231.0 |
---|
2 | xiaowang | 18.0 | NaN |
---|
t2.fillna(0)
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| name | age | tel |
---|
0 | xioahong | 20.0 | 10020.0 |
---|
1 | xioaming | 0.0 | 123231.0 |
---|
2 | xiaowang | 18.0 | 0.0 |
---|
t2.fillna(t2.mean())
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| name | age | tel |
---|
0 | xioahong | 20.0 | 10020.0 |
---|
1 | xioaming | 19.0 | 123231.0 |
---|
2 | xiaowang | 18.0 | 66625.5 |
---|
t2['age'].fillna(t2['age'].mean(0))
0 20.0
1 19.0
2 18.0
Name: age, dtype: float64
t3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list('abc'),columns=list('WXYZ'))
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
t3[t3==0] = np.nan
t3
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| W | X | Y | Z |
---|
a | NaN | 1 | 2 | 3 |
---|
b | 4.0 | 5 | 6 | 7 |
---|
c | 8.0 | 9 | 10 | 11 |
---|
from matplotlib import pyplot as plt
import pandas as pd
file_path = './can.csv'
df = pd.read_csv(file_path)
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
... | ... | ... | ... | ... | ... |
---|
152994 | 3 | 100 | 1.051 | 0.090 | -0.262 |
---|
152995 | 3 | 100 | 0.918 | 0.039 | -0.129 |
---|
152996 | 3 | 100 | 1.156 | -0.094 | -0.227 |
---|
152997 | 3 | 100 | 0.934 | 0.203 | -0.172 |
---|
152998 | 3 | 100 | 1.199 | -0.176 | 0.109 |
---|
152999 rows × 5 columns
print(df.info)
<bound method DataFrame.info of 1 20 1.004 0.090 -0.125
0 1 20 1.004 -0.043 -0.125
1 1 20 0.969 0.090 -0.121
2 1 20 0.973 -0.012 -0.137
3 1 20 1.000 -0.016 -0.121
4 1 20 0.961 0.082 -0.121
... .. ... ... ... ...
152994 3 100 1.051 0.090 -0.262
152995 3 100 0.918 0.039 -0.129
152996 3 100 1.156 -0.094 -0.227
152997 3 100 0.934 0.203 -0.172
152998 3 100 1.199 -0.176 0.109[152999 rows x 5 columns]>
runtime_data = df["20"].values
print(runtime_data)
max_runtime = runtime_data.max()
min_runtime = runtime_data.min()
num_bin = (max_runtime-min_runtime)//5
print(num_bin)
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data, num_bin)
plt.show()
[ 20 20 20 ... 100 100 100]
16
runtime_data = np.array([8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8,8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8,8.1,7.0,7.3,7.2,6.2,6.1,8.3,6.4,7.1,7.5,8.4,9.9,7.5,7.9,9.8,6.5,7.8,8.9,6.8,7.8,9.8,7.8,6.7,8.9,7.8,7.8,9.7,6.5,6.7,6.4,6.8,9.8])
print(runtime_data)
max_runtime = runtime_data.max()
min_runtime = runtime_data.min()
num_bin_list = [1.9,3.5]
i = 3.5
while i <= max_runtime:i += 0.5num_bin_list.append(i)
print(num_bin_list)
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data, num_bin_list)plt.xticks(num_bin_list)
plt.show()
[8.1 7. 7.3 7.2 6.2 6.1 8.3 6.4 7.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.96.8 7.8 9.8 7.8 6.7 8.9 7.8 7.8 9.7 6.5 6.7 6.4 6.8 9.8 8.1 7. 7.3 7.26.2 6.1 8.3 6.4 7.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.9 6.8 7.8 9.8 7.86.7 8.9 7.8 7.8 9.7 6.5 6.7 6.4 6.8 9.8 8.1 7. 7.3 7.2 6.2 6.1 8.3 6.47.1 7.5 8.4 9.9 7.5 7.9 9.8 6.5 7.8 8.9 6.8 7.8 9.8 7.8 6.7 8.9 7.8 7.89.7 6.5 6.7 6.4 6.8 9.8]
[1.9, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0]
import pandas as pddf = pd.DataFrame({'key':['A','B','C','A','B','C','A','B','C'],'data':[0,5,10,5,10,15,10,15,20]})
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| key | data |
---|
0 | A | 0 |
---|
1 | B | 5 |
---|
2 | C | 10 |
---|
3 | A | 5 |
---|
4 | B | 10 |
---|
5 | C | 15 |
---|
6 | A | 10 |
---|
7 | B | 15 |
---|
8 | C | 20 |
---|
for key in ['A','B','C']:print(key,df[df['key'] == key].sum())
A key AAA
data 15
dtype: object
B key BBB
data 30
dtype: object
C key CCC
data 45
dtype: object
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| key | data |
---|
0 | A | 0 |
---|
1 | B | 5 |
---|
2 | C | 10 |
---|
3 | A | 5 |
---|
4 | B | 10 |
---|
5 | C | 15 |
---|
6 | A | 10 |
---|
7 | B | 15 |
---|
8 | C | 20 |
---|
groupby方法
df.groupby('key').sum()
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
import numpy as np
df.groupby('key').aggregate(np.mean)
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
... | ... | ... | ... | ... | ... |
---|
152994 | 3 | 100 | 1.051 | 0.090 | -0.262 |
---|
152995 | 3 | 100 | 0.918 | 0.039 | -0.129 |
---|
152996 | 3 | 100 | 1.156 | -0.094 | -0.227 |
---|
152997 | 3 | 100 | 0.934 | 0.203 | -0.172 |
---|
152998 | 3 | 100 | 1.199 | -0.176 | 0.109 |
---|
152999 rows × 5 columns
df.groupby('1')['20'].mean()
1
1 60.000784
2 60.000000
3 60.000000
Name: 20, dtype: float64
df.groupby(by = '20').groups
{20: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], 25: [2999, 3000, 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, 3057, 3058, 3059, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3078, 3079, 3080, 3081, 3082, 3083, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, 3097, 3098, ...], 30: [5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, ...], 35: [8999, 9000, 9001, 9002, 9003, 9004, 9005, 9006, 9007, 9008, 9009, 9010, 9011, 9012, 9013, 9014, 9015, 9016, 9017, 9018, 9019, 9020, 9021, 9022, 9023, 9024, 9025, 9026, 9027, 9028, 9029, 9030, 9031, 9032, 9033, 9034, 9035, 9036, 9037, 9038, 9039, 9040, 9041, 9042, 9043, 9044, 9045, 9046, 9047, 9048, 9049, 9050, 9051, 9052, 9053, 9054, 9055, 9056, 9057, 9058, 9059, 9060, 9061, 9062, 9063, 9064, 9065, 9066, 9067, 9068, 9069, 9070, 9071, 9072, 9073, 9074, 9075, 9076, 9077, 9078, 9079, 9080, 9081, 9082, 9083, 9084, 9085, 9086, 9087, 9088, 9089, 9090, 9091, 9092, 9093, 9094, 9095, 9096, 9097, 9098, ...], 40: [11999, 12000, 12001, 12002, 12003, 12004, 12005, 12006, 12007, 12008, 12009, 12010, 12011, 12012, 12013, 12014, 12015, 12016, 12017, 12018, 12019, 12020, 12021, 12022, 12023, 12024, 12025, 12026, 12027, 12028, 12029, 12030, 12031, 12032, 12033, 12034, 12035, 12036, 12037, 12038, 12039, 12040, 12041, 12042, 12043, 12044, 12045, 12046, 12047, 12048, 12049, 12050, 12051, 12052, 12053, 12054, 12055, 12056, 12057, 12058, 12059, 12060, 12061, 12062, 12063, 12064, 12065, 12066, 12067, 12068, 12069, 12070, 12071, 12072, 12073, 12074, 12075, 12076, 12077, 12078, 12079, 12080, 12081, 12082, 12083, 12084, 12085, 12086, 12087, 12088, 12089, 12090, 12091, 12092, 12093, 12094, 12095, 12096, 12097, 12098, ...], 45: [14999, 15000, 15001, 15002, 15003, 15004, 15005, 15006, 15007, 15008, 15009, 15010, 15011, 15012, 15013, 15014, 15015, 15016, 15017, 15018, 15019, 15020, 15021, 15022, 15023, 15024, 15025, 15026, 15027, 15028, 15029, 15030, 15031, 15032, 15033, 15034, 15035, 15036, 15037, 15038, 15039, 15040, 15041, 15042, 15043, 15044, 15045, 15046, 15047, 15048, 15049, 15050, 15051, 15052, 15053, 15054, 15055, 15056, 15057, 15058, 15059, 15060, 15061, 15062, 15063, 15064, 15065, 15066, 15067, 15068, 15069, 15070, 15071, 15072, 15073, 15074, 15075, 15076, 15077, 15078, 15079, 15080, 15081, 15082, 15083, 15084, 15085, 15086, 15087, 15088, 15089, 15090, 15091, 15092, 15093, 15094, 15095, 15096, 15097, 15098, ...], 50: [17999, 18000, 18001, 18002, 18003, 18004, 18005, 18006, 18007, 18008, 18009, 18010, 18011, 18012, 18013, 18014, 18015, 18016, 18017, 18018, 18019, 18020, 18021, 18022, 18023, 18024, 18025, 18026, 18027, 18028, 18029, 18030, 18031, 18032, 18033, 18034, 18035, 18036, 18037, 18038, 18039, 18040, 18041, 18042, 18043, 18044, 18045, 18046, 18047, 18048, 18049, 18050, 18051, 18052, 18053, 18054, 18055, 18056, 18057, 18058, 18059, 18060, 18061, 18062, 18063, 18064, 18065, 18066, 18067, 18068, 18069, 18070, 18071, 18072, 18073, 18074, 18075, 18076, 18077, 18078, 18079, 18080, 18081, 18082, 18083, 18084, 18085, 18086, 18087, 18088, 18089, 18090, 18091, 18092, 18093, 18094, 18095, 18096, 18097, 18098, ...], 55: [20999, 21000, 21001, 21002, 21003, 21004, 21005, 21006, 21007, 21008, 21009, 21010, 21011, 21012, 21013, 21014, 21015, 21016, 21017, 21018, 21019, 21020, 21021, 21022, 21023, 21024, 21025, 21026, 21027, 21028, 21029, 21030, 21031, 21032, 21033, 21034, 21035, 21036, 21037, 21038, 21039, 21040, 21041, 21042, 21043, 21044, 21045, 21046, 21047, 21048, 21049, 21050, 21051, 21052, 21053, 21054, 21055, 21056, 21057, 21058, 21059, 21060, 21061, 21062, 21063, 21064, 21065, 21066, 21067, 21068, 21069, 21070, 21071, 21072, 21073, 21074, 21075, 21076, 21077, 21078, 21079, 21080, 21081, 21082, 21083, 21084, 21085, 21086, 21087, 21088, 21089, 21090, 21091, 21092, 21093, 21094, 21095, 21096, 21097, 21098, ...], 60: [23999, 24000, 24001, 24002, 24003, 24004, 24005, 24006, 24007, 24008, 24009, 24010, 24011, 24012, 24013, 24014, 24015, 24016, 24017, 24018, 24019, 24020, 24021, 24022, 24023, 24024, 24025, 24026, 24027, 24028, 24029, 24030, 24031, 24032, 24033, 24034, 24035, 24036, 24037, 24038, 24039, 24040, 24041, 24042, 24043, 24044, 24045, 24046, 24047, 24048, 24049, 24050, 24051, 24052, 24053, 24054, 24055, 24056, 24057, 24058, 24059, 24060, 24061, 24062, 24063, 24064, 24065, 24066, 24067, 24068, 24069, 24070, 24071, 24072, 24073, 24074, 24075, 24076, 24077, 24078, 24079, 24080, 24081, 24082, 24083, 24084, 24085, 24086, 24087, 24088, 24089, 24090, 24091, 24092, 24093, 24094, 24095, 24096, 24097, 24098, ...], 65: [26999, 27000, 27001, 27002, 27003, 27004, 27005, 27006, 27007, 27008, 27009, 27010, 27011, 27012, 27013, 27014, 27015, 27016, 27017, 27018, 27019, 27020, 27021, 27022, 27023, 27024, 27025, 27026, 27027, 27028, 27029, 27030, 27031, 27032, 27033, 27034, 27035, 27036, 27037, 27038, 27039, 27040, 27041, 27042, 27043, 27044, 27045, 27046, 27047, 27048, 27049, 27050, 27051, 27052, 27053, 27054, 27055, 27056, 27057, 27058, 27059, 27060, 27061, 27062, 27063, 27064, 27065, 27066, 27067, 27068, 27069, 27070, 27071, 27072, 27073, 27074, 27075, 27076, 27077, 27078, 27079, 27080, 27081, 27082, 27083, 27084, 27085, 27086, 27087, 27088, 27089, 27090, 27091, 27092, 27093, 27094, 27095, 27096, 27097, 27098, ...], 70: [29999, 30000, 30001, 30002, 30003, 30004, 30005, 30006, 30007, 30008, 30009, 30010, 30011, 30012, 30013, 30014, 30015, 30016, 30017, 30018, 30019, 30020, 30021, 30022, 30023, 30024, 30025, 30026, 30027, 30028, 30029, 30030, 30031, 30032, 30033, 30034, 30035, 30036, 30037, 30038, 30039, 30040, 30041, 30042, 30043, 30044, 30045, 30046, 30047, 30048, 30049, 30050, 30051, 30052, 30053, 30054, 30055, 30056, 30057, 30058, 30059, 30060, 30061, 30062, 30063, 30064, 30065, 30066, 30067, 30068, 30069, 30070, 30071, 30072, 30073, 30074, 30075, 30076, 30077, 30078, 30079, 30080, 30081, 30082, 30083, 30084, 30085, 30086, 30087, 30088, 30089, 30090, 30091, 30092, 30093, 30094, 30095, 30096, 30097, 30098, ...], 75: [32999, 33000, 33001, 33002, 33003, 33004, 33005, 33006, 33007, 33008, 33009, 33010, 33011, 33012, 33013, 33014, 33015, 33016, 33017, 33018, 33019, 33020, 33021, 33022, 33023, 33024, 33025, 33026, 33027, 33028, 33029, 33030, 33031, 33032, 33033, 33034, 33035, 33036, 33037, 33038, 33039, 33040, 33041, 33042, 33043, 33044, 33045, 33046, 33047, 33048, 33049, 33050, 33051, 33052, 33053, 33054, 33055, 33056, 33057, 33058, 33059, 33060, 33061, 33062, 33063, 33064, 33065, 33066, 33067, 33068, 33069, 33070, 33071, 33072, 33073, 33074, 33075, 33076, 33077, 33078, 33079, 33080, 33081, 33082, 33083, 33084, 33085, 33086, 33087, 33088, 33089, 33090, 33091, 33092, 33093, 33094, 33095, 33096, 33097, 33098, ...], 80: [35999, 36000, 36001, 36002, 36003, 36004, 36005, 36006, 36007, 36008, 36009, 36010, 36011, 36012, 36013, 36014, 36015, 36016, 36017, 36018, 36019, 36020, 36021, 36022, 36023, 36024, 36025, 36026, 36027, 36028, 36029, 36030, 36031, 36032, 36033, 36034, 36035, 36036, 36037, 36038, 36039, 36040, 36041, 36042, 36043, 36044, 36045, 36046, 36047, 36048, 36049, 36050, 36051, 36052, 36053, 36054, 36055, 36056, 36057, 36058, 36059, 36060, 36061, 36062, 36063, 36064, 36065, 36066, 36067, 36068, 36069, 36070, 36071, 36072, 36073, 36074, 36075, 36076, 36077, 36078, 36079, 36080, 36081, 36082, 36083, 36084, 36085, 36086, 36087, 36088, 36089, 36090, 36091, 36092, 36093, 36094, 36095, 36096, 36097, 36098, ...], 85: [38999, 39000, 39001, 39002, 39003, 39004, 39005, 39006, 39007, 39008, 39009, 39010, 39011, 39012, 39013, 39014, 39015, 39016, 39017, 39018, 39019, 39020, 39021, 39022, 39023, 39024, 39025, 39026, 39027, 39028, 39029, 39030, 39031, 39032, 39033, 39034, 39035, 39036, 39037, 39038, 39039, 39040, 39041, 39042, 39043, 39044, 39045, 39046, 39047, 39048, 39049, 39050, 39051, 39052, 39053, 39054, 39055, 39056, 39057, 39058, 39059, 39060, 39061, 39062, 39063, 39064, 39065, 39066, 39067, 39068, 39069, 39070, 39071, 39072, 39073, 39074, 39075, 39076, 39077, 39078, 39079, 39080, 39081, 39082, 39083, 39084, 39085, 39086, 39087, 39088, 39089, 39090, 39091, 39092, 39093, 39094, 39095, 39096, 39097, 39098, ...], 90: [41999, 42000, 42001, 42002, 42003, 42004, 42005, 42006, 42007, 42008, 42009, 42010, 42011, 42012, 42013, 42014, 42015, 42016, 42017, 42018, 42019, 42020, 42021, 42022, 42023, 42024, 42025, 42026, 42027, 42028, 42029, 42030, 42031, 42032, 42033, 42034, 42035, 42036, 42037, 42038, 42039, 42040, 42041, 42042, 42043, 42044, 42045, 42046, 42047, 42048, 42049, 42050, 42051, 42052, 42053, 42054, 42055, 42056, 42057, 42058, 42059, 42060, 42061, 42062, 42063, 42064, 42065, 42066, 42067, 42068, 42069, 42070, 42071, 42072, 42073, 42074, 42075, 42076, 42077, 42078, 42079, 42080, 42081, 42082, 42083, 42084, 42085, 42086, 42087, 42088, 42089, 42090, 42091, 42092, 42093, 42094, 42095, 42096, 42097, 42098, ...], 95: [44999, 45000, 45001, 45002, 45003, 45004, 45005, 45006, 45007, 45008, 45009, 45010, 45011, 45012, 45013, 45014, 45015, 45016, 45017, 45018, 45019, 45020, 45021, 45022, 45023, 45024, 45025, 45026, 45027, 45028, 45029, 45030, 45031, 45032, 45033, 45034, 45035, 45036, 45037, 45038, 45039, 45040, 45041, 45042, 45043, 45044, 45045, 45046, 45047, 45048, 45049, 45050, 45051, 45052, 45053, 45054, 45055, 45056, 45057, 45058, 45059, 45060, 45061, 45062, 45063, 45064, 45065, 45066, 45067, 45068, 45069, 45070, 45071, 45072, 45073, 45074, 45075, 45076, 45077, 45078, 45079, 45080, 45081, 45082, 45083, 45084, 45085, 45086, 45087, 45088, 45089, 45090, 45091, 45092, 45093, 45094, 45095, 45096, 45097, 45098, ...], 100: [47999, 48000, 48001, 48002, 48003, 48004, 48005, 48006, 48007, 48008, 48009, 48010, 48011, 48012, 48013, 48014, 48015, 48016, 48017, 48018, 48019, 48020, 48021, 48022, 48023, 48024, 48025, 48026, 48027, 48028, 48029, 48030, 48031, 48032, 48033, 48034, 48035, 48036, 48037, 48038, 48039, 48040, 48041, 48042, 48043, 48044, 48045, 48046, 48047, 48048, 48049, 48050, 48051, 48052, 48053, 48054, 48055, 48056, 48057, 48058, 48059, 48060, 48061, 48062, 48063, 48064, 48065, 48066, 48067, 48068, 48069, 48070, 48071, 48072, 48073, 48074, 48075, 48076, 48077, 48078, 48079, 48080, 48081, 48082, 48083, 48084, 48085, 48086, 48087, 48088, 48089, 48090, 48091, 48092, 48093, 48094, 48095, 48096, 48097, 48098, ...]}
df = pd.DataFrame([[1,2,3],[4,5,6]],index=['a','b'],columns=['A','B','C'])
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
df.sum(axis=0)
A 5
B 7
C 9
dtype: int64
df.sum(axis=1)
a 6
b 15
dtype: int64
df.sum(axis='columns')
a 6
b 15
dtype: int64
df.max(axis=0)
A 4
B 5
C 6
dtype: int64
df.median(axis=0)
A 2.5
B 3.5
C 4.5
dtype: float64
二元统计
df = pd.read_csv('./can.csv')
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
... | ... | ... | ... | ... | ... |
---|
152994 | 3 | 100 | 1.051 | 0.090 | -0.262 |
---|
152995 | 3 | 100 | 0.918 | 0.039 | -0.129 |
---|
152996 | 3 | 100 | 1.156 | -0.094 | -0.227 |
---|
152997 | 3 | 100 | 0.934 | 0.203 | -0.172 |
---|
152998 | 3 | 100 | 1.199 | -0.176 | 0.109 |
---|
152999 rows × 5 columns
df.head()
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
df.cov()
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
1 | 0.666669 | -0.000261 | -0.003833 | 0.003257 | 0.000941 |
---|
20 | -0.000261 | 599.997386 | 0.040354 | 0.052441 | 0.113526 |
---|
1.004 | -0.003833 | 0.040354 | 0.599015 | 0.012148 | -0.036479 |
---|
0.090 | 0.003257 | 0.052441 | 0.012148 | 0.551461 | -0.010641 |
---|
-0.125 | 0.000941 | 0.113526 | -0.036479 | -0.010641 | 0.267299 |
---|
df.corr()
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
1 | 1.000000 | -0.000013 | -0.006065 | 0.005372 | 0.002228 |
---|
20 | -0.000013 | 1.000000 | 0.002129 | 0.002883 | 0.008964 |
---|
1.004 | -0.006065 | 0.002129 | 1.000000 | 0.021137 | -0.091164 |
---|
0.090 | 0.005372 | 0.002883 | 0.021137 | 1.000000 | -0.027716 |
---|
-0.125 | 0.002228 | 0.008964 | -0.091164 | -0.027716 | 1.000000 |
---|
df
.dataframe tbody tr th {vertical-align: top;
}.dataframe thead th {text-align: right;
}
| 1 | 20 | 1.004 | 0.090 | -0.125 |
---|
0 | 1 | 20 | 1.004 | -0.043 | -0.125 |
---|
1 | 1 | 20 | 0.969 | 0.090 | -0.121 |
---|
2 | 1 | 20 | 0.973 | -0.012 | -0.137 |
---|
3 | 1 | 20 | 1.000 | -0.016 | -0.121 |
---|
4 | 1 | 20 | 0.961 | 0.082 | -0.121 |
---|
... | ... | ... | ... | ... | ... |
---|
152994 | 3 | 100 | 1.051 | 0.090 | -0.262 |
---|
152995 | 3 | 100 | 0.918 | 0.039 | -0.129 |
---|
152996 | 3 | 100 | 1.156 | -0.094 | -0.227 |
---|
152997 | 3 | 100 | 0.934 | 0.203 | -0.172 |
---|
152998 | 3 | 100 | 1.199 | -0.176 | 0.109 |
---|
152999 rows × 5 columns
df['1.004'].value_counts()
0.980 44520.977 43580.996 42321.000 41940.984 4166...
-3.547 15.844 14.988 16.816 1
-3.668 1
Name: 1.004, Length: 2733, dtype: int64
df['1.004'].value_counts(ascending=True)
-3.668 16.816 14.988 15.844 1
-3.547 1... 0.984 41661.000 41940.996 42320.977 43580.980 4452
Name: 1.004, Length: 2733, dtype: int64
df['1.004'].value_counts(ascending=True, bins=5)
(-8.017, -4.801] 118
(4.797, 7.996] 635
(-4.801, -1.602] 1613
(1.598, 4.797] 10207
(-1.602, 1.598] 140426
Name: 1.004, dtype: int64
df['1'].value_counts(ascending=True)
1 50999
2 51000
3 51000
Name: 1, dtype: int64