Week 11: Data Visualisation part 1¶

uploaded as given¶

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
plt.style.use(['dark_background'])

import seaborn as sns
sns.set(color_codes=True)

/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

style.use('seaborn-ticks')
sns.set(color_codes=True)

Tabulation¶

url = 'https://api.covid19india.org/states_daily.json'

import urllib.request

urllib.request.urlretrieve(url, 'data.json');

covid_data = pd.read_json('data.json')

covid_data

import json

with open('data.json') as f:
    data = json.load(f)

data = data['states_daily']

covid_data = pd.json_normalize(data)

covid_data

df = covid_data

df.date = pd.to_datetime(df.date)

df = df[df.status == 'Confirmed']

df.drop('status', axis=1, inplace=True)

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

df.set_index('date', inplace=True)

df

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 108 entries, 2020-03-14 to 2020-06-29
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   an      108 non-null    object
 1   ap      108 non-null    object
 2   ar      108 non-null    object
 3   as      108 non-null    object
 4   br      108 non-null    object
 5   ch      108 non-null    object
 6   ct      108 non-null    object
 7   dd      108 non-null    object
 8   dl      108 non-null    object
 9   dn      108 non-null    object
 10  ga      108 non-null    object
 11  gj      108 non-null    object
 12  hp      108 non-null    object
 13  hr      108 non-null    object
 14  jh      108 non-null    object
 15  jk      108 non-null    object
 16  ka      108 non-null    object
 17  kl      108 non-null    object
 18  la      108 non-null    object
 19  ld      108 non-null    object
 20  mh      108 non-null    object
 21  ml      108 non-null    object
 22  mn      108 non-null    object
 23  mp      108 non-null    object
 24  mz      108 non-null    object
 25  nl      108 non-null    object
 26  or      108 non-null    object
 27  pb      108 non-null    object
 28  py      108 non-null    object
 29  rj      108 non-null    object
 30  sk      108 non-null    object
 31  tg      108 non-null    object
 32  tn      108 non-null    object
 33  tr      108 non-null    object
 34  tt      108 non-null    object
 35  un      108 non-null    object
 36  up      108 non-null    object
 37  ut      108 non-null    object
 38  wb      108 non-null    object
dtypes: object(39)
memory usage: 33.8+ KB

df.tn

date
2020-03-14       1
2020-03-15       0
2020-03-16       0
2020-03-17       0
2020-03-18       1
              ... 
2020-06-25    3509
2020-06-26    3645
2020-06-27    3713
2020-06-28    3940
2020-06-29    3949
Name: tn, Length: 108, dtype: object

pd.to_numeric(df.tn)

date
2020-03-14       1
2020-03-15       0
2020-03-16       0
2020-03-17       0
2020-03-18       1
              ... 
2020-06-25    3509
2020-06-26    3645
2020-06-27    3713
2020-06-28    3940
2020-06-29    3949
Name: tn, Length: 108, dtype: int64

df = df.apply(pd.to_numeric)

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 108 entries, 2020-03-14 to 2020-06-29
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   an      108 non-null    int64
 1   ap      108 non-null    int64
 2   ar      108 non-null    int64
 3   as      108 non-null    int64
 4   br      108 non-null    int64
 5   ch      108 non-null    int64
 6   ct      108 non-null    int64
 7   dd      108 non-null    int64
 8   dl      108 non-null    int64
 9   dn      108 non-null    int64
 10  ga      108 non-null    int64
 11  gj      108 non-null    int64
 12  hp      108 non-null    int64
 13  hr      108 non-null    int64
 14  jh      108 non-null    int64
 15  jk      108 non-null    int64
 16  ka      108 non-null    int64
 17  kl      108 non-null    int64
 18  la      108 non-null    int64
 19  ld      108 non-null    int64
 20  mh      108 non-null    int64
 21  ml      108 non-null    int64
 22  mn      108 non-null    int64
 23  mp      108 non-null    int64
 24  mz      108 non-null    int64
 25  nl      108 non-null    int64
 26  or      108 non-null    int64
 27  pb      108 non-null    int64
 28  py      108 non-null    int64
 29  rj      108 non-null    int64
 30  sk      108 non-null    int64
 31  tg      108 non-null    int64
 32  tn      108 non-null    int64
 33  tr      108 non-null    int64
 34  tt      108 non-null    int64
 35  un      108 non-null    int64
 36  up      108 non-null    int64
 37  ut      108 non-null    int64
 38  wb      108 non-null    int64
dtypes: int64(39)
memory usage: 33.8 KB

df.tail(7)

Styling tabulation¶

df = df.tail(7)

df.style

def colour_red_negative(x):
    color = 'red' if x < 0 else 'white'
    return 'color: ' + color

df.style.applymap(colour_red_negative)

df.drop('un', axis=1, inplace=True)

df.style.applymap(colour_red_negative)

df.style.highlight_max(color='red')

df.drop(['dd', 'ld'], axis=1,inplace=True)

df.style.highlight_max(color='red').highlight_min(color='green')

df.drop('tt', axis=1, inplace=True)

def bold_max_value(x):
    is_max = (x == x.max())
    return ['font-weight: bold' if y else '' for y in is_max]

df.style.apply(bold_max_value)

df.style.apply(bold_max_value).highlight_min(color='green')

df.style.apply(bold_max_value).highlight_min(color='green', axis=1)

df.style.apply(bold_max_value).highlight_max(color='red', axis=1)

df.style.background_gradient(cmap='Reds')

df.style.background_gradient(cmap='Reds', axis=1)

df.style.background_gradient(cmap='Reds', subset=['mh', 'tn', 'dl'])

df.style.bar()

df.style.bar(subset=['mh', 'tn', 'dl'])

df[['mh', 'tn', 'dl']].style.bar()

df[['mh', 'tn', 'dl']].style.bar(subset=['mh'], color='red').bar(subset=['tn'], color='orange').bar(subset=['dl'], color='yellow')

Distribution of data¶

Distribution of a single continuous variable¶

Histogram¶

x = np.random.normal(size=1000)

sns.distplot(x);

sns.distplot(x, kde=False);

sns.distplot(x, kde=False, rug=True);

sns.distplot(x, kde=False, rug=True, bins=50);

sns.kdeplot(x);

sns.kdeplot(x, shade=True);

y = np.random.uniform(size=1000)

sns.kdeplot(x,shade=True)
sns.kdeplot(y,shade=True);

d = sns.load_dataset('diamonds')

d

d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53940 entries, 0 to 53939
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   carat    53940 non-null  float64
 1   cut      53940 non-null  object 
 2   color    53940 non-null  object 
 3   clarity  53940 non-null  object 
 4   depth    53940 non-null  float64
 5   table    53940 non-null  float64
 6   price    53940 non-null  int64  
 7   x        53940 non-null  float64
 8   y        53940 non-null  float64
 9   z        53940 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 4.1+ MB

sns.distplot(d.carat);

sns.distplot(d.price);

sns.distplot(d.x);

sns.distplot(d.x, rug=True);

sns.distplot(d.sample(1000).x, rug=True, bins=50);

sns.kdeplot(d.x, shade=True)
sns.kdeplot(d.y, shade=True)
sns.kdeplot(d.z, shade=True);

Box plot¶

x = np.random.normal(size=1000)

sns.boxplot(x)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0f1378fc18>

sns.kdeplot(x);

x = np.random.uniform(size=1000)

sns.boxplot(x);

sns.boxplot(x, whis=0.2)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0f14d0c6a0>

x = np.random.normal(size=1000)

sns.boxplot(x, whis=0.5);

sns.boxplot(x, whis=0.5, fliersize=1);

sns.boxplot(x, whis=0.5, fliersize=1, orient='v');

sns.boxplot(d.price);

sns.kdeplot(d.price);

sns.boxplot(d.x);

sns.distplot(d.x);

sns.distplot(d.carat)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0f14ca10b8>

sns.boxplot(d.carat)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0f14f55908>

Boxen plots¶

sns.boxplot(d.sample(5000).carat);

sns.boxenplot(d.sample(5000).carat);

sns.boxenplot(x = 'island', y = 'body_mass_g', data = p);

Distribution of a categorical variable¶

Bar plots¶

c = d.groupby('cut')['cut'].count()

sns.barplot(x=c.index, y=c.values)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0f14c31c50>

c = d.groupby('clarity')['clarity'].count()

sns.barplot(x=c.index, y=c.values);

c = d.groupby('color')['color'].count()

sns.barplot(x=c.index, y=c.values);

Joint distribution of two variables¶

Jointplot¶

x = np.random.normal(size=1000)
y = np.random.normal(size=1000)

df = pd.DataFrame({'x': x, 'y': y})

sns.jointplot('x', 'y', data=df);

sns.jointplot('x', 'y', data=df, kind='kde');

x = np.random.normal(size=1000)
y = 3 * x + np.random.normal(size=1000)/5

df = pd.DataFrame({'x': x, 'y': y})

sns.jointplot('x', 'y', data=df, kind='kde');

sns.jointplot('carat', 'price', data=d, kind='kde');

sns.jointplot('carat', 'price', data=d.sample(500));

sns.jointplot('x', 'price', data=d.sample(500));

sns.jointplot('x', 'price', data=d.sample(500), kind='kde');

Swarm plot¶

sns.swarmplot(d.sample(1000).carat);

sns.swarmplot(d.sample(100).price);

d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53940 entries, 0 to 53939
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   carat    53940 non-null  float64
 1   cut      53940 non-null  object 
 2   color    53940 non-null  object 
 3   clarity  53940 non-null  object 
 4   depth    53940 non-null  float64
 5   table    53940 non-null  float64
 6   price    53940 non-null  int64  
 7   x        53940 non-null  float64
 8   y        53940 non-null  float64
 9   z        53940 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 4.1+ MB

sns.swarmplot(x='cut', y='price', data=d.sample(1000));

sns.swarmplot(x='color', y='price', data=d.sample(1000));

sns.swarmplot(x='clarity', y='price', data=d.sample(1000));

sns.swarmplot(x='clarity', y='price', data=d.sample(1000));

p = sns.load_dataset('penguins')

p

sns.swarmplot(x='species', y='body_mass_g', data=p);

sns.swarmplot(x='island', y='body_mass_g', data=p);

sns.swarmplot(x='body_mass_g', data=p);

Violin plot¶

sns.violinplot(x='body_mass_g', data=p);

sns.boxplot(x='body_mass_g', data=p);

sns.kdeplot(p.body_mass_g, shade=True);

fig, axs = plt.subplots(nrows=4)
sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
sns.kdeplot(p.body_mass_g, shade=True, ax=axs[3]);

fig, axs = plt.subplots(nrows=4)
fig.set_size_inches(5, 10);
sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
sns.kdeplot(p.body_mass_g, shade=True, ax=axs[3]);

fig, axs = plt.subplots(nrows=4)
fig.set_size_inches(5, 10);
p1 = sns.swarmplot(x='body_mass_g', data=p, ax=axs[0]);
p1.set(xlim=(2000, 7500));
p2 = sns.violinplot(x='body_mass_g', data=p, ax=axs[1]);
p2.set(xlim=(2000, 7500));
p3 = sns.boxplot(x='body_mass_g', data=p, ax=axs[2]);
p3.set(xlim=(2000, 7500));
p4 = sns.kdeplot(p.body_mass_g, shade=True, ax=axs[3]);
p4.set(xlim=(2000, 7500));

sns.violinplot(x='body_mass_g', data=p);

sns.violinplot(x='body_mass_g', data=p, orient='v');

sns.violinplot(x='species', y='body_mass_g', data=p);

p.head()

sns.violinplot(x='species', y='flipper_length_mm', data=p);

sns.violinplot(x='island', y='flipper_length_mm', data=p);

sns.violinplot(x='sex', y='flipper_length_mm', data=p);

sns.violinplot(x='island', y='flipper_length_mm', data=p);

sns.swarmplot(x='island', y='flipper_length_mm', data=p);

sns.swarmplot(x='island', y='flipper_length_mm', hue='sex', data=p);

sns.swarmplot(x='island', y='flipper_length_mm', hue='species', data=p);

sns.swarmplot(x='cut', y='price', data=d.sample(1000));

sns.swarmplot(x='cut', y='price', hue='color', data=d.sample(1000));

sns.violinplot(x='island', y='flipper_length_mm', data=p[p.sex=='MALE']);

sns.violinplot(x='island', y='flipper_length_mm', data=p[p.sex=='FEMALE']);

sns.violinplot(x='island', y='flipper_length_mm', hue='sex', split=True, data=p);

sns.violinplot(x='island', y='flipper_length_mm', 
               hue='sex', split=True, inner='quartile', data=p);

sns.violinplot(x='island', y='flipper_length_mm', 
               hue='species', split=True, inner='quartile', data=p);

sns.violinplot(x='island', y='flipper_length_mm', 
               hue='species', data=p);

p['binary_species'] = p.species.apply(lambda x: 0 if x == 'Gentoo' else 1)

p

sns.violinplot(x='island', y='flipper_length_mm', 
               hue='binary_species', split=True, inner='quartile', data=p);

p['binary_species'] = p.species.apply(lambda x: 'Gentoo' if x == 'Gentoo' else 'Adelie | Chinstrap')

sns.violinplot(x='island', y='flipper_length_mm', 
               hue='binary_species', split=True, inner='quartile', data=p);

Faceted plotting¶

sns.kdeplot(p.flipper_length_mm, shade=True);

sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, shade=True);

sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, shade=True);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, shade=True);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, shade=True);

sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, shade=True);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, shade=True);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, shade=True);
plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

sns.boxplot(p[p.species == 'Gentoo'].flipper_length_mm);
sns.boxplot(p[p.species == 'Adelie'].flipper_length_mm);
sns.boxplot(p[p.species == 'Chinstrap'].flipper_length_mm);
plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

fig, axs = plt.subplots(nrows=3);
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, shade=True, ax=axs[0]);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, shade=True, ax=axs[1]);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, shade=True, ax=axs[2]);
# plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

fig, axs = plt.subplots(nrows=3);
sns.kdeplot(p[p.species == 'Gentoo'].flipper_length_mm, shade=True, ax=axs[0]);
sns.kdeplot(p[p.species == 'Adelie'].flipper_length_mm, shade=True, ax=axs[1]);
sns.kdeplot(p[p.species == 'Chinstrap'].flipper_length_mm, shade=True, ax=axs[2]);
plt.tight_layout()
# plt.legend(title='Species', labels=['Gentoo', 'Adelie', 'Chinstrap']);

column_name = 'species'
nrows = len(p[column_name].unique())
fig, axs = plt.subplots(nrows=nrows);
i = 0
for c_v in p[column_name].unique():
    pl = sns.kdeplot(p[p[column_name] == c_v].flipper_length_mm, 
                shade=True, ax=axs[i]);
    pl.set_title(c_v);
    i += 1
plt.tight_layout()

g = sns.FacetGrid(p, row='species');
g.map(sns.kdeplot, 'flipper_length_mm', shade=True);

g = sns.FacetGrid(p, col='species');
g.map(sns.kdeplot, 'flipper_length_mm', shade=True);

g = sns.FacetGrid(p, col='island');
g.map(sns.kdeplot, 'flipper_length_mm', shade=True);

g = sns.FacetGrid(p, col='island');
g.map(sns.distplot, 'flipper_length_mm');

g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.distplot, 'flipper_length_mm');

g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.kdeplot, 'flipper_length_mm');

g = sns.FacetGrid(p, col='island', row='sex');
g.map(sns.violinplot, 'flipper_length_mm');

/usr/local/lib/python3.6/dist-packages/seaborn/axisgrid.py:723: UserWarning: Using the violinplot function without specifying `order` is likely to produce an incorrect plot.
  warnings.warn(warning)

Pair plot¶

sns.jointplot(p.body_mass_g, p.flipper_length_mm);

sns.jointplot(p.body_mass_g, p.culmen_depth_mm);

sns.pairplot(p);

sns.pairplot(p, hue='sex');

sns.pairplot(p, hue='species');

sns.pairplot(d.sample(1000));

sns.pairplot(d.sample(1000), hue='cut');

sns.pairplot(d.sample(1000), hue='cut', corner=True);

	states_daily
0	{'an': '0', 'ap': '1', 'ar': '0', 'as': '0', '...
1	{'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
2	{'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
3	{'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
4	{'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
...	...
319	{'an': '2', 'ap': '428', 'ar': '6', 'as': '274...
320	{'an': '0', 'ap': '12', 'ar': '0', 'as': '1', ...
321	{'an': '7', 'ap': '793', 'ar': '5', 'as': '302...
322	{'an': '0', 'ap': '324', 'ar': '1', 'as': '245...
323	{'an': '0', 'ap': '11', 'ar': '0', 'as': '0', ...

	an	ap	ar	as	br	ch	ct	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	tg	tn	tr	tt	un	up	ut	wb
date
2020-06-23	2	462	10	203	157	7	83	0	3947	9	45	549	48	495	53	148	322	141	85	0	3214	1	23	183	0	50	167	162	19	395	1	879	2516	23	15656	183	571	133	370
2020-06-24	6	497	2	226	223	2	34	0	3788	13	42	572	31	490	26	186	397	152	0	0	3889	0	49	187	3	17	282	230	59	382	4	891	2865	0	16868	126	664	88	445
2020-06-25	2	553	12	364	215	3	37	0	3390	20	44	577	33	453	44	127	442	123	9	0	4842	0	86	147	0	8	210	142	41	287	2	920	3509	32	18205	352	636	68	475
2020-06-26	14	605	2	273	190	2	89	0	3460	15	44	580	25	421	31	213	445	150	5	0	5024	2	19	203	2	16	218	188	32	364	2	985	3645	35	18255	-370	750	34	542
2020-06-27	0	796	3	246	302	3	65	0	2948	15	89	615	30	543	45	204	918	195	14	0	6368	0	17	167	3	16	170	99	85	284	0	1087	3713	9	20142	-100	606	66	521
2020-06-28	11	813	5	327	244	3	84	0	2889	4	70	624	22	402	25	127	1267	118	3	0	5493	2	93	221	1	28	264	160	29	327	1	983	3940	12	19610	-184	598	32	572
2020-06-29	7	793	5	302	394	3	101	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	975	3949	34	18339	-554	681	8	624

	an	ap	ar	as	br	ch	ct	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	tg	tn	tr	tt	un	up	ut	wb
date
2020-06-23 00:00:00	2	462	10	203	157	7	83	0	3947	9	45	549	48	495	53	148	322	141	85	0	3214	1	23	183	0	50	167	162	19	395	1	879	2516	23	15656	183	571	133	370
2020-06-24 00:00:00	6	497	2	226	223	2	34	0	3788	13	42	572	31	490	26	186	397	152	0	0	3889	0	49	187	3	17	282	230	59	382	4	891	2865	0	16868	126	664	88	445
2020-06-25 00:00:00	2	553	12	364	215	3	37	0	3390	20	44	577	33	453	44	127	442	123	9	0	4842	0	86	147	0	8	210	142	41	287	2	920	3509	32	18205	352	636	68	475
2020-06-26 00:00:00	14	605	2	273	190	2	89	0	3460	15	44	580	25	421	31	213	445	150	5	0	5024	2	19	203	2	16	218	188	32	364	2	985	3645	35	18255	-370	750	34	542
2020-06-27 00:00:00	0	796	3	246	302	3	65	0	2948	15	89	615	30	543	45	204	918	195	14	0	6368	0	17	167	3	16	170	99	85	284	0	1087	3713	9	20142	-100	606	66	521
2020-06-28 00:00:00	11	813	5	327	244	3	84	0	2889	4	70	624	22	402	25	127	1267	118	3	0	5493	2	93	221	1	28	264	160	29	327	1	983	3940	12	19610	-184	598	32	572
2020-06-29 00:00:00	7	793	5	302	394	3	101	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	975	3949	34	18339	-554	681	8	624

	an	ap	ar	as	br	ch	ct	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	tg	tn	tr	tt	un	up	ut	wb
date
2020-06-23 00:00:00	2	462	10	203	157	7	83	0	3947	9	45	549	48	495	53	148	322	141	85	0	3214	1	23	183	0	50	167	162	19	395	1	879	2516	23	15656	183	571	133	370
2020-06-24 00:00:00	6	497	2	226	223	2	34	0	3788	13	42	572	31	490	26	186	397	152	0	0	3889	0	49	187	3	17	282	230	59	382	4	891	2865	0	16868	126	664	88	445
2020-06-25 00:00:00	2	553	12	364	215	3	37	0	3390	20	44	577	33	453	44	127	442	123	9	0	4842	0	86	147	0	8	210	142	41	287	2	920	3509	32	18205	352	636	68	475
2020-06-26 00:00:00	14	605	2	273	190	2	89	0	3460	15	44	580	25	421	31	213	445	150	5	0	5024	2	19	203	2	16	218	188	32	364	2	985	3645	35	18255	-370	750	34	542
2020-06-27 00:00:00	0	796	3	246	302	3	65	0	2948	15	89	615	30	543	45	204	918	195	14	0	6368	0	17	167	3	16	170	99	85	284	0	1087	3713	9	20142	-100	606	66	521
2020-06-28 00:00:00	11	813	5	327	244	3	84	0	2889	4	70	624	22	402	25	127	1267	118	3	0	5493	2	93	221	1	28	264	160	29	327	1	983	3940	12	19610	-184	598	32	572
2020-06-29 00:00:00	7	793	5	302	394	3	101	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	975	3949	34	18339	-554	681	8	624

	an	ap	ar	as	br	ch	ct	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	tg	tn	tr	tt	up	ut	wb
date
2020-06-23 00:00:00	2	462	10	203	157	7	83	0	3947	9	45	549	48	495	53	148	322	141	85	0	3214	1	23	183	0	50	167	162	19	395	1	879	2516	23	15656	571	133	370
2020-06-24 00:00:00	6	497	2	226	223	2	34	0	3788	13	42	572	31	490	26	186	397	152	0	0	3889	0	49	187	3	17	282	230	59	382	4	891	2865	0	16868	664	88	445
2020-06-25 00:00:00	2	553	12	364	215	3	37	0	3390	20	44	577	33	453	44	127	442	123	9	0	4842	0	86	147	0	8	210	142	41	287	2	920	3509	32	18205	636	68	475
2020-06-26 00:00:00	14	605	2	273	190	2	89	0	3460	15	44	580	25	421	31	213	445	150	5	0	5024	2	19	203	2	16	218	188	32	364	2	985	3645	35	18255	750	34	542
2020-06-27 00:00:00	0	796	3	246	302	3	65	0	2948	15	89	615	30	543	45	204	918	195	14	0	6368	0	17	167	3	16	170	99	85	284	0	1087	3713	9	20142	606	66	521
2020-06-28 00:00:00	11	813	5	327	244	3	84	0	2889	4	70	624	22	402	25	127	1267	118	3	0	5493	2	93	221	1	28	264	160	29	327	1	983	3940	12	19610	598	32	572
2020-06-29 00:00:00	7	793	5	302	394	3	101	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	975	3949	34	18339	681	8	624

Consulting - Quality

Data Visualisations

Week 11 - Class Demo

Week 11: Data Visualisation part 1¶

uploaded as given¶

Tabulation¶

Styling tabulation¶

Distribution of data¶

Distribution of a single continuous variable¶

Histogram¶

Box plot¶

Boxen plots¶

Distribution of a categorical variable¶

Bar plots¶

Joint distribution of two variables¶

Jointplot¶

Swarm plot¶

Violin plot¶

Faceted plotting¶

Pair plot¶

	an	ap	ar	as	br	ch	ct	date	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	status	tg	tn	tr	tt	un	up	ut	wb
0	0	1	0	0	0	0	0	14-Mar-20	0	7	0	0	0	0	14	0	2	6	19	0	0	14	0	0	0	0	0	0	1	0	3	0	Confirmed	1	1	0	81	0	12	0	0
1	0	0	0	0	0	0	0	14-Mar-20	0	1	0	0	0	0	0	0	0	0	3	0	0	0	0	0	0	0	0	0	0	0	1	0	Recovered	0	0	0	9	0	4	0	0
2	0	0	0	0	0	0	0	14-Mar-20	0	1	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	Deceased	0	0	0	2	0	0	0	0
3	0	0	0	0	0	0	0	15-Mar-20	0	0	0	0	0	0	0	0	0	0	5	0	0	18	0	0	0	0	0	0	0	0	1	0	Confirmed	2	0	0	27	0	1	0	0
4	0	0	0	0	0	0	0	15-Mar-20	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0	Recovered	1	0	0	4	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
319	2	428	6	274	226	1	125	28-Jun-20	0	3306	13	58	391	17	445	69	91	220	42	32	0	2330	0	23	113	0	0	137	206	31	244	0	Recovered	244	1443	8	11628	0	593	106	404
320	0	12	0	1	4	0	0	28-Jun-20	0	65	0	1	19	0	5	0	1	16	0	0	0	156	0	0	7	0	0	3	5	1	8	0	Deceased	4	54	0	384	0	11	1	10
321	7	793	5	302	394	3	101	29-Jun-20	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	Confirmed	975	3949	34	18339	-554	681	8	624
322	0	324	1	245	218	13	88	29-Jun-20	0	3628	6	46	440	38	585	56	269	176	79	30	0	2385	0	39	115	6	4	203	238	10	310	0	Recovered	410	2212	6	13497	0	698	93	526
323	0	11	0	0	1	0	0	29-Jun-20	0	57	0	0	19	0	9	3	1	19	1	0	0	181	0	0	7	0	0	2	5	0	6	0	Deceased	6	62	0	417	0	12	1	14

	an	ap	ar	as	br	ch	ct	dd	dl	dn	ga	gj	hp	hr	jh	jk	ka	kl	la	ld	mh	ml	mn	mp	mz	nl	or	pb	py	rj	sk	tg	tn	tr	tt	un	up	ut	wb
date
2020-03-14	0	1	0	0	0	0	0	0	7	0	0	0	0	14	0	2	6	19	0	0	14	0	0	0	0	0	0	1	0	3	0	1	1	0	81	0	12	0	0
2020-03-15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	5	0	0	18	0	0	0	0	0	0	0	0	1	0	2	0	0	27	0	1	0	0
2020-03-16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	3	0	0	6	0	0	0	0	0	1	0	1	0	0	1	0	0	15	0	0	1	0
2020-03-17	0	0	0	0	0	0	0	0	1	0	0	0	0	1	0	0	2	0	0	0	3	0	0	0	0	0	0	0	0	0	0	1	0	0	11	0	2	0	1
2020-03-18	0	0	0	0	0	0	0	0	2	0	0	0	0	1	0	1	5	0	8	0	3	0	0	0	0	0	1	1	0	3	0	8	1	0	37	0	2	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2020-06-25	2	553	12	364	215	3	37	0	3390	20	44	577	33	453	44	127	442	123	9	0	4842	0	86	147	0	8	210	142	41	287	2	920	3509	32	18205	352	636	68	475
2020-06-26	14	605	2	273	190	2	89	0	3460	15	44	580	25	421	31	213	445	150	5	0	5024	2	19	203	2	16	218	188	32	364	2	985	3645	35	18255	-370	750	34	542
2020-06-27	0	796	3	246	302	3	65	0	2948	15	89	615	30	543	45	204	918	195	14	0	6368	0	17	167	3	16	170	99	85	284	0	1087	3713	9	20142	-100	606	66	521
2020-06-28	11	813	5	327	244	3	84	0	2889	4	70	624	22	402	25	127	1267	118	3	0	5493	2	93	221	1	28	264	160	29	327	1	983	3940	12	19610	-184	598	32	572
2020-06-29	7	793	5	302	394	3	101	0	2084	15	53	626	26	381	62	144	1105	122	1	0	5257	1	42	184	0	36	245	202	42	389	0	975	3949	34	18339	-554	681	8	624

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75
...	...	...	...	...	...	...	...	...	...	...
53935	0.72	Ideal	D	SI1	60.8	57.0	2757	5.75	5.76	3.50
53936	0.72	Good	D	SI1	63.1	55.0	2757	5.69	5.75	3.61
53937	0.70	Very Good	D	SI1	62.8	60.0	2757	5.66	5.68	3.56
53938	0.86	Premium	H	SI2	61.0	58.0	2757	6.15	6.12	3.74
53939	0.75	Ideal	D	SI2	62.2	55.0	2757	5.83	5.87	3.64

	species	island	culmen_length_mm	culmen_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	MALE
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	FEMALE
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	FEMALE
3	Adelie	Torgersen	NaN	NaN	NaN	NaN	NaN
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	FEMALE
...	...	...	...	...	...	...	...
339	Gentoo	Biscoe	NaN	NaN	NaN	NaN	NaN
340	Gentoo	Biscoe	46.8	14.3	215.0	4850.0	FEMALE
341	Gentoo	Biscoe	50.4	15.7	222.0	5750.0	MALE
342	Gentoo	Biscoe	45.2	14.8	212.0	5200.0	FEMALE
343	Gentoo	Biscoe	49.9	16.1	213.0	5400.0	MALE