16  Aggregate

import pandas as pd
import numpy as np
df = pd.DataFrame([('bird', 'Falconiformes', 389.0),
                   ('bird', 'Psittaciformes', 24.0),
                   ('mammal', 'Carnivora', 80.2),
                   ('mammal', 'Primates', np.nan),
                   ('mammal', 'Carnivora', 58)],
                  index=['falcon', 'parrot', 'lion', 'monkey', 'leopard'],
                  columns=('class', 'order', 'max_speed'))
df
class order max_speed
falcon bird Falconiformes 389.0
parrot bird Psittaciformes 24.0
lion mammal Carnivora 80.2
monkey mammal Primates NaN
leopard mammal Carnivora 58.0

16.0.1 groupby(): group by categorical

grouped = df.groupby('class')
grouped.groups
{'bird': ['falcon', 'parrot'], 'mammal': ['lion', 'monkey', 'leopard']}
grouped2 = df.groupby(['class', 'order'])
grouped2.groups
{('bird', 'Falconiformes'): ['falcon'], ('bird', 'Psittaciformes'): ['parrot'], ('mammal', 'Carnivora'): ['lion', 'leopard'], ('mammal', 'Primates'): ['monkey']}
grouped.size()
class
bird      2
mammal    3
dtype: int64
df.groupby('class')['max_speed'].apply(np.mean)
class
bird      206.5
mammal     69.1
Name: max_speed, dtype: float64