python索引

来源：尔游网

分层索引

分层索引提供了一种在更低维度的形式中处理更高维数据的方式。
话不多说，来看代码。

data = pd.Series(np.random.randn(9), index = [['a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd'], [1, 2, 3, 1, 3, 1, 2, 3, 3]])
data
a  1   -1.958537
   2   -0.629011
   3   -0.025869
b  1    1.578471
   3   -0.444149
c  1    1.780159
   2   -0.633612
   3    0.269422
d  3    1.583292
dtype: float

# 查看索引
data.index
# 通过索引选择数据
data['b']
# out: 
1    1.578471
3   -0.444149
dtype: float

data['b': 'c']
# out:
b  1    1.578471
   3   -0.444149
c  1    1.780159
   2   -0.633612
   3    0.269422
dtype: float

data.loc[['a', 'b']]
# out:
a  1   -1.958537
   2   -0.629011
   3   -0.025869
b  1    1.578471
   3   -0.444149
dtype: float

data.loc[:, 2]
# out:
a   -0.629011
c   -0.633612
dtype: float

# 转换为华丽丽的表格形式
data.unstack()
# out:
	1	2	3
a	-1.958537	-0.629011	-0.025869
b	1.578471	NaN	-0.444149
c	1.780159	-0.633612	0.269422
d	NaN	NaN	1.583292

# 打回原形
data.unstack().stack()

DataFrame也类似

df = pd.DataFrame(np.arange(12).reshape(4, 3), index = [['a', 'a', 'b', 'c'], [1, 2, 1, 2]], 
                  columns = [['ohio', 'ohio', 'colorado'], ['green', 'red', 'green']])
# 为索引加名字
df.index.names = ['index1', 'index2']
df.columns.names = ['state', 'color']

重排序和层级排序

# 交换索引顺序
df.swaplevel('index1', 'index2')
# 按照第一层索引排序
df.sort_index(level = 0)

按照层级进行汇总统计

df.sum(level = 'state', axis = 1)

使用DataFrame的列进行索引

df1 = pd.DataFrame({'a': range(5), 'b': np.random.randn(5), 'c': ['one', 'two', 'two', 'two', 'three'], 'd': [10, 11, 12, 13, 14]})
# 列值变索引
df2 = df1.set_index(['c', 'd'])

# 保留原来的列
df3 = df1.set_index(['c', 'd'], drop = False)
# out:
a	b	c	d
c	d				
one	10	0	-0.699494	one	10
two	11	1	0.185732	two	11
12	2	0.346032	two	12
13	3	0.666136	two	13
three	14	4	0.542233	three	14

# 反操作
df2.reset_index()

因篇幅问题不能全部显示，请点此查看更多更全内容

查看全文