A 8
B 9
C 10
D 11
Name: 2024-01-03 00:00:00, dtype: int32
df.loc[:,['A','B']]#保存所有行,选择A、B两列
A
B
2024-01-01
0
1
2024-01-02
4
5
2024-01-03
8
9
2024-01-04
12
13
2024-01-05
16
17
2024-01-06
20
21
df.loc['20240102',['A','B']]
A 4
B 5
Name: 2024-01-02 00:00:00, dtype: int32
select by position:iloc 纯位置筛选
df
A
B
C
D
2024-01-01
0
1
2
3
2024-01-02
4
5
6
7
2024-01-03
8
9
10
11
2024-01-04
12
13
14
15
2024-01-05
16
17
18
19
2024-01-06
20
21
22
23
df.iloc[3]# 选择第三行数据
A 12
B 13
C 14
D 15
Name: 2024-01-04 00:00:00, dtype: int32
df.iloc[3,1]# 选择第三行第一位数据
13
df.iloc[3:5,1:3]
B
C
2024-01-04
13
14
2024-01-05
17
18
df.iloc[[1,3,5],1:3]
B
C
2024-01-02
5
6
2024-01-04
13
14
2024-01-06
21
22
mixed selection:ix 既有标签又有位置筛选
df
A
B
C
D
2024-01-01
0
1
2
3
2024-01-02
4
5
6
7
2024-01-03
8
9
10
11
2024-01-04
12
13
14
15
2024-01-05
16
17
18
19
2024-01-06
20
21
22
23
df.ix[:3,['A','C']]# anaconda中ix已被弃用
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[64], line 1
----> 1 df.ix[:3,['A','C']]
File D:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py:5902, in NDFrame.__getattr__(self, name)
5895 if (
5896 name not in self._internal_names_set
5897 and name not in self._metadata
5898 and name not in self._accessors
5899 and self._info_axis._can_hold_identifiers_and_holds_name(name)
5900 ):
5901 return self[name]
-> 5902 return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'ix'
C:\Users\43160\AppData\Local\Temp\ipykernel_15804\3917667868.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
res = df1.append(df2,ignore_index=True)
C:\Users\43160\AppData\Local\Temp\ipykernel_15804\3744420715.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
res = df1.append([df2,df3],ignore_index=True)
a
b
c
d
0
0.0
0.0
0.0
0.0
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
3
1.0
1.0
1.0
1.0
4
1.0
1.0
1.0
1.0
5
1.0
1.0
1.0
1.0
6
1.0
1.0
1.0
1.0
7
1.0
1.0
1.0
1.0
8
1.0
1.0
1.0
1.0
res = df1.append([df2,df3])
res
C:\Users\43160\AppData\Local\Temp\ipykernel_15804\1214992729.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
res = df1.append([df2,df3])
C:\Users\43160\AppData\Local\Temp\ipykernel_15804\2713288841.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
res = df1.append(s1,ignore_index=True)
a
b
c
d
0
0.0
0.0
0.0
0.0
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
3
1.0
2.0
3.0
4.0
3.merge合并
import pandas as pd
left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
right
key
C
D
0
K0
C0
D0
1
K1
C1
D1
2
K2
C2
D2
3
K3
C3
D3
left
key
A
B
0
K0
A0
B0
1
K1
A1
B1
2
K2
A2
B2
3
K3
A3
B3
res = pd.merge(left,right,on='key')# 基于'key'进行合并
res
key
A
B
C
D
0
K0
A0
B0
C0
D0
1
K1
A1
B1
C1
D1
2
K2
A2
B2
C2
D2
3
K3
A3
B3
C3
D3
consider two keys
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
left
key1
key2
A
B
0
K0
K0
A0
B0
1
K0
K1
A1
B1
2
K1
K0
A2
B2
3
K2
K1
A3
B3
right
key1
key2
C
D
0
K0
K0
C0
D0
1
K1
K0
C1
D1
2
K1
K0
C2
D2
3
K2
K0
C3
D3
res = pd.merge(left,right,on=['key1','key2'])
res
key1
key2
A
B
C
D
0
K0
K0
A0
B0
C0
D0
1
K1
K0
A2
B2
C1
D1
2
K1
K0
A2
B2
C2
D2
res = pd.merge(left,right,on=['key1','key2'],how='inner')# 默认inner
res
key1
key2
A
B
C
D
0
K0
K0
A0
B0
C0
D0
1
K1
K0
A2
B2
C1
D1
2
K1
K0
A2
B2
C2
D2
# how={'inner','outer','right','left'}
res = pd.merge(left,right,on=['key1','key2'],how='outer')
res
key1
key2
A
B
C
D
0
K0
K0
A0
B0
C0
D0
1
K0
K1
A1
B1
NaN
NaN
2
K1
K0
A2
B2
C1
D1
3
K1
K0
A2
B2
C2
D2
4
K2
K1
A3
B3
NaN
NaN
5
K2
K0
NaN
NaN
C3
D3
left
key1
key2
A
B
0
K0
K0
A0
B0
1
K0
K1
A1
B1
2
K1
K0
A2
B2
3
K2
K1
A3
B3
right
key1
key2
C
D
0
K0
K0
C0
D0
1
K1
K0
C1
D1
2
K1
K0
C2
D2
3
K2
K0
C3
D3
res = pd.merge(left,right,on=['key1','key2'],how='left')
res
key1
key2
A
B
C
D
0
K0
K0
A0
B0
C0
D0
1
K0
K1
A1
B1
NaN
NaN
2
K1
K0
A2
B2
C1
D1
3
K1
K0
A2
B2
C2
D2
4
K2
K1
A3
B3
NaN
NaN
res = pd.merge(left,right,on=['key1','key2'],how='right')
res
res = pd.merge(df1,df2,on='col1',how='outer',indicator=True)# 显示merge方式是怎样merge的
res
col1
col_left
col_right
_merge
0
0
a
NaN
left_only
1
1
b
2.0
both
2
2
NaN
2.0
right_only
3
2
NaN
2.0
right_only
res = pd.merge(df1,df2,on='col1',how='outer',indicator='indicator_columns')# 显示merge方式是怎样merge的
res
col1
col_left
col_right
indicator_columns
0
0
a
NaN
left_only
1
1
b
2.0
both
2
2
NaN
2.0
right_only
3
2
NaN
2.0
right_only
index
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
index=['K0','K2','K3'])
left
A
B
K0
A0
B0
K1
A1
B1
K2
A2
B2
right
C
D
K0
C0
D0
K2
C2
D2
K3
C3
D3
# left_index and right_index
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
res
A
B
C
D
K0
A0
B0
C0
D0
K1
A1
B1
NaN
NaN
K2
A2
B2
C2
D2
K3
NaN
NaN
C3
D3
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')
res
A
B
C
D
K0
A0
B0
C0
D0
K2
A2
B2
C2
D2
# handle overlapping
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
boys
k
age
0
K0
1
1
K1
2
2
K2
3
girls
k
age
0
K0
4
1
K0
5
2
K3
6
res = pd.merge(boys, girls, on='k', suffixes=['_boy','_girl'], how='inner')#处理名字相同,但是内涵不同的数据用suffixes
res
k
age_boy
age_girl
0
K0
1
4
1
K0
1
5
pandas:数据可视化
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# plot data
#Series 一维数组,线性数据
data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data