Pandas综合练习

ryluo 2020-06-14 01:29:22
Pandas系列

Datawhale组队学习pandas笔记

第一题

import pandas as pd
import numpy as np

data = pd.read_csv('数据集/2002年-2018年上海机动车拍照拍卖.csv')
data['Winning rate'] = data['Total number of license issued'] / data['Total number of applicants']
data.head()
data['years'] = data['Date'].apply(lambda x: 2000 + int(x.split('-')[0]))
data['months'] = data['Date'].apply(lambda x:x.split('-')[1])
months = ["Jan","Feb","Mar","Apr","May","Jun",
         "Jul","Aug","Sep","Oct","Nov","Dec"]

data['months'] = pd.Categorical(data['months'], categories=months, ordered=True)
data.head()
index = data[data['Winning rate']  < 0.05]['Date'].index
# 第一题
data.loc[index].sort_values(by=['years', 'months'])['Date'].head(1)

'''
OUT:
159    15-May
'''
# 第三题答案
data = data.reindex(columns=['years', 'months', 'Total number of license issued', 'lowest price ', 
                      'avg price', 'Total number of applicants', 'Winning rate'])
data.head()
# 第二题
df_2 = data.groupby('years')['lowest price '].agg(['max', 'mean'])
df_2['quantile_0.75'] = data.groupby('years')['lowest price '].agg(lambda x: x.quantile(0.75))
df_2.head()
L1 = ['Year']
L2 = ['Total number of license issued', 'lowest price ', 'avg price', 'Total number of applicants']
mul_index = pd.MultiIndex.from_product([L1,L2],names=('Upper', 'Lower'))

mul_index
data_cp = data.copy()
data_cp = data_cp.melt(id_vars=['years', 'months'], value_vars=\
                       ['Total number of license issued', 'lowest price ', 'avg price', 'Total number of applicants'])
data_cp
# 第四题
pd.pivot_table(data_cp, index=['years', 'variable'], columns='months',values='value').head()