import pandas as pd
import numpy as np16 Aggregate
df = pd.DataFrame([('bird', 'Falconiformes', 389.0),
('bird', 'Psittaciformes', 24.0),
('mammal', 'Carnivora', 80.2),
('mammal', 'Primates', np.nan),
('mammal', 'Carnivora', 58)],
index=['falcon', 'parrot', 'lion', 'monkey', 'leopard'],
columns=('class', 'order', 'max_speed'))
df| class | order | max_speed | |
|---|---|---|---|
| falcon | bird | Falconiformes | 389.0 |
| parrot | bird | Psittaciformes | 24.0 |
| lion | mammal | Carnivora | 80.2 |
| monkey | mammal | Primates | NaN |
| leopard | mammal | Carnivora | 58.0 |
16.0.1 groupby(): group by categorical
grouped = df.groupby('class')
grouped.groups{'bird': ['falcon', 'parrot'], 'mammal': ['lion', 'monkey', 'leopard']}
grouped2 = df.groupby(['class', 'order'])
grouped2.groups{('bird', 'Falconiformes'): ['falcon'], ('bird', 'Psittaciformes'): ['parrot'], ('mammal', 'Carnivora'): ['lion', 'leopard'], ('mammal', 'Primates'): ['monkey']}
grouped.size()class
bird 2
mammal 3
dtype: int64
df.groupby('class')['max_speed'].apply(np.mean)class
bird 206.5
mammal 69.1
Name: max_speed, dtype: float64