Plotting from Pandas and with Seaborn
Original Source: https://www.coursera.org/specializations/data-science-python
Pandas Visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# set default figure size to (14, 8)
plt.rcParams['figure.figsize'] = (14.0, 8.0)
# see the pre-defined styles provided.
plt.style.available
['bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark-palette',
'seaborn-dark',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'seaborn',
'Solarize_Light2',
'tableau-colorblind10',
'_classic_test']
# use the 'seaborn-colorblind' style
plt.style.use('seaborn-colorblind')
DataFrame.plot
np.random.seed(123)
df = pd.DataFrame({'A': np.random.randn(365).cumsum(0),
'B': np.random.randn(365).cumsum(0) + 20,
'C': np.random.randn(365).cumsum(0) - 20},
index=pd.date_range('1/1/2017', periods=365))
df.head()
A | B | C | |
---|---|---|---|
2017-01-01 | -1.085631 | 20.059291 | -20.230904 |
2017-01-02 | -0.088285 | 21.803332 | -16.659325 |
2017-01-03 | 0.194693 | 20.835588 | -17.055481 |
2017-01-04 | -1.311601 | 21.255156 | -17.093802 |
2017-01-05 | -1.890202 | 21.462083 | -19.518638 |
df.plot(); # add a semi-colon to the end of the plotting call to suppress unwanted output
We can select which plot we want to use by passing it into the ‘kind’ parameter.
df.plot('A','B', kind = 'scatter');
You can also choose the plot kind by using the DataFrame.plot.kind
methods instead of providing the kind
keyword argument.
kind
:
'line'
: line plot (default)'bar'
: vertical bar plot'barh'
: horizontal bar plot'hist'
: histogram'box'
: boxplot'kde'
: Kernel Density Estimation plot'density'
: same as ‘kde’'area'
: area plot'pie'
: pie plot'scatter'
: scatter plot'hexbin'
: hexbin plot
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
<matplotlib.axes._subplots.AxesSubplot at 0x20d94e15630>
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
ax.set_aspect('equal')
df.plot.box();
df.plot.hist(alpha=0.5);
Kernel density estimation plots are useful for deriving a smooth continuous function from a given sample.
df.plot.kde();
pandas.plotting
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..
# the original dataset would probably call this ['Species']
iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= iris['feature_names'] + ['target'])
iris.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | target | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0.0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0.0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0.0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0.0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0.0 |
pd.plotting.scatter_matrix(iris);
plt.figure()
pd.plotting.parallel_coordinates(iris, 'target');
Seaborn
import seaborn as sns
# set seaborn design
sns.set()
np.random.seed(1234)
v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2');
plt.legend();
# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', density=True);
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3);
plt.figure()
# sns.displot does this in one line
sns.distplot(v3);
sns.jointplot(v1, v2, alpha=0.4);
grid = sns.jointplot(v1, v2, alpha=0.4);
grid.ax_joint.set_aspect('equal')
sns.jointplot(v1, v2, kind='hex');
# set the seaborn style for all the following plots
sns.set_style('white')
sns.jointplot(v1, v2, kind='kde', space=0);
sns.pairplot(iris, hue='target', diag_kind='kde', size=2);
plt.figure(figsize=(8,6))
plt.subplot(121)
sns.swarmplot('target', 'petal length (cm)', data=iris);
plt.subplot(122)
sns.violinplot('target', 'petal length (cm)', data=iris);
Leave a Comment