Plotting from Pandas and with Seaborn

Original Source: https://www.coursera.org/specializations/data-science-python

Pandas Visualization

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# set default figure size to (14, 8)
plt.rcParams['figure.figsize'] = (14.0, 8.0)
# see the pre-defined styles provided.
plt.style.available
['bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark-palette',
 'seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'seaborn',
 'Solarize_Light2',
 'tableau-colorblind10',
 '_classic_test']
# use the 'seaborn-colorblind' style
plt.style.use('seaborn-colorblind')

DataFrame.plot

np.random.seed(123)

df = pd.DataFrame({'A': np.random.randn(365).cumsum(0),
                   'B': np.random.randn(365).cumsum(0) + 20,
                   'C': np.random.randn(365).cumsum(0) - 20},
                  index=pd.date_range('1/1/2017', periods=365))
df.head()
A B C
2017-01-01 -1.085631 20.059291 -20.230904
2017-01-02 -0.088285 21.803332 -16.659325
2017-01-03 0.194693 20.835588 -17.055481
2017-01-04 -1.311601 21.255156 -17.093802
2017-01-05 -1.890202 21.462083 -19.518638
df.plot(); # add a semi-colon to the end of the plotting call to suppress unwanted output

png

We can select which plot we want to use by passing it into the ‘kind’ parameter.

df.plot('A','B', kind = 'scatter');

png

You can also choose the plot kind by using the DataFrame.plot.kind methods instead of providing the kind keyword argument.

kind :

  • 'line' : line plot (default)
  • 'bar' : vertical bar plot
  • 'barh' : horizontal bar plot
  • 'hist' : histogram
  • 'box' : boxplot
  • 'kde' : Kernel Density Estimation plot
  • 'density' : same as ‘kde’
  • 'area' : area plot
  • 'pie' : pie plot
  • 'scatter' : scatter plot
  • 'hexbin' : hexbin plot
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
<matplotlib.axes._subplots.AxesSubplot at 0x20d94e15630>

png

ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
ax.set_aspect('equal')

png

df.plot.box();

png

df.plot.hist(alpha=0.5);

png

Kernel density estimation plots are useful for deriving a smooth continuous function from a given sample.

df.plot.kde();

png

pandas.plotting

Iris flower data set

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()

# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..  
# the original dataset would probably call this ['Species']
iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
iris.head()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
0 5.1 3.5 1.4 0.2 0.0
1 4.9 3.0 1.4 0.2 0.0
2 4.7 3.2 1.3 0.2 0.0
3 4.6 3.1 1.5 0.2 0.0
4 5.0 3.6 1.4 0.2 0.0
pd.plotting.scatter_matrix(iris);

png

plt.figure()
pd.plotting.parallel_coordinates(iris, 'target');

png

Seaborn

import seaborn as sns

# set seaborn design
sns.set()
np.random.seed(1234)

v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2');
plt.legend();

png

# plot a kernel density estimation over a stacked barchart
plt.figure()
plt.hist([v1, v2], histtype='barstacked', density=True);
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3);

png

plt.figure()
# sns.displot does this in one line
sns.distplot(v3);

png

sns.jointplot(v1, v2, alpha=0.4);

png

grid = sns.jointplot(v1, v2, alpha=0.4);
grid.ax_joint.set_aspect('equal')

png

sns.jointplot(v1, v2, kind='hex');

png

# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0);

png

sns.pairplot(iris, hue='target', diag_kind='kde', size=2);

png

plt.figure(figsize=(8,6))
plt.subplot(121)
sns.swarmplot('target', 'petal length (cm)', data=iris);
plt.subplot(122)
sns.violinplot('target', 'petal length (cm)', data=iris);

png

Leave a Comment