1. 设置小数点位数
- ~~~python
df[‘a’] = df[‘a’].round(decimals=2)1 2 3
| 2. ~~~python decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C']) df.round(decimals)
|
2. pandas统计每行的个数
1 2
| zero_col_count = dict(df[0].value_counts()) three_row_count = dict(df.loc[3].value_counts())
|
🎵我喜欢的音乐-Ming
3. 全局变量
global
1 2 3 4 5 6 7
| i = 0 def add(a,b): global i print('第'+i+'次计算') return a+b for i in range(10): print(add(i,i+1))
|
4. pandas中apply列的输入输出
1 to 1
1 2 3 4
| def judge(x): if x < 10: return 0 df['new_col'] = df['exist_col'].apply(judge)
|
N to 1
1 2 3 4
| def judge(x): if(x['old_col_1'] < 10 && x['old_col_2'] == 'boy'): return 'child' df['new_col'] = df.apply(judge,axis=1)
|
1 to N
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| 就是针对输出的三行写三个函数即可,或者使用lambda分别处理就行 df_test = pd.DataFrame([ {'dir': '/Users/uname1', 'size': 994933}, {'dir': '/Users/uname2', 'size': 109338711}, ])
df_test['size_kb'] = df_test['size'].astype(int).apply(lambda x: locale.format("%.1f", x / 1024.0, grouping=True) + ' KB') df_test['size_mb'] = df_test['size'].astype(int).apply(lambda x: locale.format("%.1f", x / 1024.0 ** 2, grouping=True) + ' MB') df_test['size_gb'] = df_test['size'].astype(int).apply(lambda x: locale.format("%.1f", x / 1024.0 ** 3, grouping=True) + ' GB')
df_test
dir size size_kb size_mb size_gb 0 /Users/uname1 994933 971.6 KB 0.9 MB 0.0 GB 1 /Users/uname2 109338711 106,776.1 KB 104.3 MB 0.1 GB
|
5. 删除空值
1 2 3
| cond = df.isnull().any(axis = 1) index = df[cond].index df2 = df.drop(labels=index)
|
6. 根据条件删除值
1 2 3
| cond = (df < 60).any(axis=1) index = df[cond].index df3 = df.drop[labels=index]
|
7. 规范化到[0,1]
1 2 3 4 5 6
| from sklearn import preprocessing
min_max_scaler=preprocessing.MinMaxScaler() car_x=min_max_scaler.fit_transform(car_x) pd.DataFrame(car_x).to_csv('temp.csv', index=False) print(car_x)
|
8. pandas删除行、列
1 2 3 4 5 6 7 8 9 10 11
| print(df.drop('Charlie', axis=0)) print(df.drop('Charlie')) print(df.drop(index='Charlie'))
print(df.drop(['Bob', 'Dave', 'Frank'])) print(df.drop(index=['Bob', 'Dave', 'Frank']))
print(df.index[[1, 3, 5]]) print(df.drop(df.index[[1, 3, 5]])) print(df.drop(index=df.index[[1, 3, 5]]))
|
1 2 3 4 5 6 7 8 9 10
| print(df.drop('state', axis=1)) print(df.drop(columns='state'))
print(df.drop(['state', 'point'], axis=1)) print(df.drop(columns=['state', 'point']))
print(df.columns[[1, 2]]) print(df.drop(df.columns[[1, 2]], axis=1)) print(df.drop(columns=df.columns[[1, 2]]))
|
1 2 3 4 5 6 7 8 9
| print(df.drop(index=['Bob', 'Dave', 'Frank'], columns=['state', 'point']))
print(df.drop(index=df.index[[1, 3, 5]], columns=df.columns[[1, 2]]))
df_org = df.copy() df_org.drop(index=['Bob', 'Dave', 'Frank'], columns=['state', 'point'], inplace=True)
|
9. 删除缺失值的行
1
| df = df[~df['col'].isnull()]
|
10. 删除索引
在读取的时候不要读取索引列!
1
| qs = pd.read_excel('./2018-QS-World-University-Rankings-Top200.xlsx',index=False)
|
11. matplotlib画图
看这个就完事了
12. 你问我怎么添加图例
13. 设置行的索引为一列
1
| data['user_geohash'] = data._stat_axis.values.tolist()
|
14. pandas修改列名
1
| df1.rename(columns={'c':'D'},inplace=True)
|
15. pandas修改列顺序
1 2
| order = ['date', 'time', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto'] df = df[order]
|

16. pandas获取指定列中某个值(范围)所属的行
1
| df[df['colName'].isin([left,right])]
|
17. pandas合并表
1 2 3
| df.join() pd.conact() df.merge()
|
18. 三目运算符
19. 查看列名
plt.annotate(s=’’,xy = (6,30),xytext = (8,32),arrowprops={‘arrowstyle’:’->’})
~~~