import pandas as pd
import re13 Data I/O
13.1 Read CSV
iris = pd.read_csv('~/icloud/Data/iris.csv')
iris| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa | 
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa | 
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa | 
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa | 
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa | 
| ... | ... | ... | ... | ... | ... | 
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica | 
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica | 
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica | 
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica | 
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica | 
150 rows × 5 columns
13.2 Specify data types at read time
iris = pd.read_csv('~/icloud/Data/iris.csv',
    dtype = {'Species': 'category'})
iris.shape(150, 5)
13.3 Drop duplicates
iris = iris.drop_duplicates()
iris.shape(149, 5)
13.4 Clean column names
iris.columns = [re.sub('\.', '_', col) for col in list(iris.columns)]
iris.columns<>:1: SyntaxWarning:
invalid escape sequence '\.'
<>:1: SyntaxWarning:
invalid escape sequence '\.'
/var/folders/rb/99nqfz7s2rb6d_p0d6yxtbxc0000gn/T/ipykernel_49981/2509700635.py:1: SyntaxWarning:
invalid escape sequence '\.'
Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
       'Species'],
      dtype='object')
13.5 Write CSV
iris.to_csv('~/icloud/Data/irisp.csv')13.6 Write parquet
iris.to_parquet('~/icloud/Data/irisp.parquet')