import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
plt.style.use(['dark_background'])

import urllib.request
import json

import seaborn as sns
sns.set(color_codes=True)

Plotting the composition of data¶

Static composition¶

Pie chart¶

p = sns.load_dataset('penguins')

p.head()

p.groupby('species')['species'].count()

species
Adelie       152
Chinstrap     68
Gentoo       124
Name: species, dtype: int64

c = p.groupby('species')['species'].count()

plt.pie(c);
plt.show()

plt.pie(c, labels=c.index);
plt.show()

plt.pie(c, labels=c.index, autopct="%.2f%%");
plt.show()

plt.pie(c, labels=c.index, autopct="%.2f%%",
        explode=[0, 1, 0]);
plt.show()

plt.pie(c, labels=c.index, autopct="%.2f%%",
        explode=[0, 1, 0], startangle=180);
plt.show()

plt.pie(c, labels=c.index, autopct="%.2f%%",
        explode=[0, 1, 0], startangle=180, shadow=True);
plt.show()

plt.pie(np.random.randint(0, 10, 10));
plt.show()

Donut chart¶

plt.pie(np.random.randint(0, 10, 10), wedgeprops=dict(width=0.3));
plt.show()

cmap = plt.get_cmap('Accent')
my_colours = cmap(np.arange(10))

plt.pie(np.random.randint(0, 10, 10), 
        wedgeprops=dict(width=0.3),
        colors=my_colours);
plt.show()

plt.pie(c, labels=c.index, autopct="%.2f%%", wedgeprops=dict(width=0.3));
plt.show()

c_i = p.groupby('island')['island'].count()

plt.pie(c_i, labels=c_i.index, autopct="%.2f%%", wedgeprops=dict(width=0.3));
plt.show()

c = pd.crosstab(p.species, p.island)

c = c.T

c

plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3));

plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3));
plt.pie(c.values.flatten(), radius=0.7, 
        wedgeprops=dict(width=0.3));

cmap = plt.get_cmap('tab20c')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1, 2, 3, 5, 6, 7, 9, 10, 11]))

plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3), 
        colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7, 
        wedgeprops=dict(width=0.3),
        colors=inner_colors);

plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3), 
        colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7, 
        labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
        wedgeprops=dict(width=0.3),
        colors=inner_colors);

plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3), 
        colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7, 
        labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
        wedgeprops=dict(width=0.3),
        colors=inner_colors,
        labeldistance=0.75);

cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1, 2, 3, 5, 6, 7, 9, 10, 11]))
plt.pie(c.sum(axis=1), labels=c.index, 
        radius = 1, wedgeprops=dict(width=0.3), 
        colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7, 
        labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
        wedgeprops=dict(width=0.3),
        colors=inner_colors,
        labeldistance=0.75, textprops=dict(color='w'));

Stacked bar plot¶

url = 'https://api.covid19india.org/states_daily.json'
urllib.request.urlretrieve(url, 'data.json')

with open('data.json') as f:
  data = json.load(f)
data = data['states_daily']
df = pd.json_normalize(data)

df.head()

df_ = df.tail(3)
df_.drop('date', axis=1, inplace=True)
df_.set_index('status', inplace=True)
df_ = df_.T
df_ = df_.apply(pd.to_numeric)
df_.drop('tt', inplace=True)

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

df_.head()

plt.bar(df_.index, df_.Confirmed);

plt.bar(df_.index, df_.Confirmed);
plt.xticks(rotation=90);

plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.xticks(rotation=90);

plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered);
plt.xticks(rotation=90);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered);
plt.xticks(rotation=90);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);

for i, val in enumerate(df_.index):
    y = df_.loc[val].sum() + 100
    x = i
    plt.text(x, y, str(y), ha="center");

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);

for i, val in enumerate(df_.index):
    y = df_.loc[val].sum() + 100
    if y > 1000:
        x = i
        plt.text(x, y, str(y), ha="center");

Relative stacked bar plots¶

df_.head()

df_['Total'] = 0

df_['Total'] = df_.sum(axis = 1)

df_.head()

df_['ConfirmedFraction'] = df_['Confirmed'] / df_['Total']
df_['RecoveredFraction'] = df_['Recovered'] / df_['Total']
df_['DeceasedFraction'] = df_['Deceased'] / df_['Total']

df_.head()

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.ConfirmedFraction, color='Orange');
plt.bar(df_.index, df_.RecoveredFraction, bottom=df_.ConfirmedFraction, color='Green');
plt.bar(df_.index, df_.DeceasedFraction, bottom=df_.ConfirmedFraction + df_.RecoveredFraction, color='Red');
plt.xticks(rotation=90);

df_ = df_.sort_values('ConfirmedFraction', ascending=False)

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.ConfirmedFraction, color='Orange');
plt.bar(df_.index, df_.RecoveredFraction, bottom=df_.ConfirmedFraction, color='Green');
plt.bar(df_.index, df_.DeceasedFraction, bottom=df_.ConfirmedFraction + df_.RecoveredFraction, color='Red');
plt.xticks(rotation=90);

df_ = df_.sort_values('Total', ascending=False)

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);

for i, val in enumerate(df_.index):
    y = df_.loc[val, 'Total'] + 100
    if y > 1000:
        x = i
        plt.text(x, y, str(y), ha="center");

df_ = df_.sort_values('Total', ascending=False)

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.barh(df_.index, df_.Confirmed, color='Orange');
plt.barh(df_.index, df_.Recovered, left=df_.Confirmed, color='Green');
plt.barh(df_.index, df_.Deceased, left=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);

Time-varying composition of data¶

Stacked area plots¶

 df.head()

df_ = df[['mh', 'date', 'status']]

df_.head()

df_['mh'] = pd.to_numeric(df_['mh'])
df_['date'] = pd.to_datetime(df_['date'])

df_.head()

date | confirmed | recoverd | deceased |

2020-03-14 | 14 | 0 | 0

2020-03-14 | 18 | 0 | 0

pivot

df_ = df_.pivot_table(values="mh", columns="status", index="date")

df_.head()

df_.plot.area();

plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
              colors=['orange', 'green', 'red']);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
              labels=['Confirmed', 'Recovered', 'Deceased'],
              colors=['orange', 'green', 'red']);

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
              labels=['Confirmed', 'Recovered', 'Deceased'],
              colors=['orange', 'green', 'red']);
plt.legend();

Relative stacked area plot¶

fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed/df_.sum(axis=1), 
              df_.Recovered/df_.sum(axis=1), 
              df_.Deceased/df_.sum(axis=1),
              labels=['Confirmed', 'Recovered', 'Deceased'],
              colors=['orange', 'green', 'red']);
plt.legend();

def plot_stacked_area_by_state(state):
    df_ = df[[state, 'date', 'status']]
    df_[state] = pd.to_numeric(df_[state])
    df_['date'] = pd.to_datetime(df_['date'])
    df_ = df_.pivot_table(values=state, columns="status", index="date")
    fig = plt.gcf();
    fig.set_size_inches(15, 6);
    plt.stackplot(df_.index, df_.Confirmed/df_.sum(axis=1), 
                df_.Recovered/df_.sum(axis=1), 
                df_.Deceased/df_.sum(axis=1),
                labels=['Confirmed', 'Recovered', 'Deceased'],
                colors=['orange', 'green', 'red']);
    plt.legend();

plot_stacked_area_by_state('tn')

/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.

plot_stacked_area_by_state('wb')

/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.

plot_stacked_area_by_state('dl')

/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.

Plotting relationships between data¶

Scatter plot¶

t = sns.load_dataset('tips')

t.head()

sns.scatterplot(x='total_bill', y='tip', data=t);

<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9b9dc550>

t['tip_fraction'] = t['tip']/t['total_bill']

sns.scatterplot(x='total_bill', y='tip_fraction', data=t);

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='time');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='sex');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='smoker');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='day');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='size');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='size', style='sex');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='time', style='sex', size='size');

sns.scatterplot(x='total_bill', y='tip', data=t,
                hue='time', style='sex', size='size');
plt.legend(bbox_to_anchor=(1.05, 1));

sns.regplot(x='total_bill', y='tip', data=t);

sns.regplot(x='total_bill', y='tip_fraction', data=t);

sns.regplot(x='total_bill', y='tip_fraction', data=t, marker="+");

d = sns.load_dataset('diamonds')

d.head()

sns.scatterplot('x', 'price', data=d.sample(1000));

sns.regplot('x', 'price', data=d.sample(1000));

sns.regplot('x', 'price', data=d.sample(1000), order=2, marker="+");

Bar plots¶

sns.barplot(x="day", y ="tip", data=t);

sns.barplot(x="day", y ="tip_fraction", data=t);

sns.barplot(x="day", y ="tip", data=t);

sns.barplot(x="day", y ="tip", data=t, estimator=np.median);

def my_estimate(v):
    return np.quantile(v, 0.25)

sns.barplot(x="day", y ="tip", data=t, estimator=my_estimate);

sns.barplot(x="day", y ="tip", hue="sex", data=t, estimator=np.median);

sns.barplot(x="day", y ="tip", hue="smoker", data=t, estimator=np.median);

sns.barplot(x="day", y ="tip", hue="time", data=t, estimator=np.median);

sns.barplot(x="day", y ="tip_fraction", hue="time", data=t, estimator=np.median);

d.head()

sns.scatterplot('x', 'price', data=d.sample(1000));

sns.barplot('x', 'price', data=d.sample(1000));

d['x_q'] = pd.cut(d['x'], bins=15);

d.head()

sns.barplot('x_q', 'price', data=d.sample(1000));

d['x_q'] = pd.cut(d['x'], bins=15, labels=False);

d.head()

sns.barplot('x_q', 'price', data=d.sample(1000));

Line plot¶

f = sns.load_dataset('fmri')

f.head()

sns.lineplot('timepoint', 'signal', data=f);

sns.lineplot('timepoint', 'signal', data=f, hue="region");

sns.lineplot('timepoint', 'signal', data=f, hue="event");

sns.lineplot('timepoint', 'signal', data=f, hue="event", style="region");

sns.lineplot('timepoint', 'signal', data=f, marker=True);

sns.lineplot('timepoint', 'signal', data=f, marker=True, estimator=np.median);

sns.lineplot('timepoint', 'signal', data=f, units='subject', estimator=None);

f_ = f[(f.region == "parietal") & (f.event == "cue")]

f_.head()

sns.lineplot('timepoint', 'signal', data=f_, hue='subject', estimator=None);

x = np.array([-3, -2, -1, 0, 1, 2, 3])

y = x * x

sns.lineplot(x, y);

with open('data.json') as f:
  data = json.load(f)
data = data['states_daily']
df = pd.json_normalize(data)
df['date'] = pd.to_datetime(df['date']) 
df.drop('tt', axis=1, inplace=True)
df.set_index('date', inplace=True)
df = df[df['status'] == 'Confirmed']
df.drop('status', axis=1, inplace=True)
df = df.apply(pd.to_numeric)
df = df.rolling(7).mean()
df.reset_index(inplace=True)

df.head()

date | state | confirmed

2020-03-14 | an | 0

2020-03-14 | ap | 1

df_ = pd.melt(df, id_vars="date", 
              value_vars = list(df.columns).remove("date"),
              var_name = "state", value_name = "confirmed")

df_.head()

sns.lineplot('date', 'confirmed', data=df_);

sns.lineplot('date', 'confirmed', hue="state", data=df_);

states = ['mh', 'tn', 'dl', 'wb', 'ka', 'gj']

df_ = df_[df_.state.isin(states)]

sns.lineplot('date', 'confirmed', hue="state", data=df_);

sns.lineplot('date', 'confirmed', hue="state", data=df_,
             palette='Reds');

fig = plt.gcf();
fig.set_size_inches(15, 6);
sns.lineplot('date', 'confirmed', hue="state", data=df_,
             palette='Reds');

fig = plt.gcf();
fig.set_size_inches(15, 6);
sns.lineplot('date', 'confirmed', hue="state", data=df_,
             palette='Reds', hue_order = ['wb', 'gj', 'ka', 'dl', 'tn', 'mh']);

Heatmap¶

X [10 x 10]

X(i, j) -> value

x = np.random.rand(10, 10)

x

array([[0.44147103, 0.86844671, 0.97218962, 0.6935189 , 0.8069725 ,
        0.5578699 , 0.09532397, 0.78801904, 0.32624519, 0.03927953],
       [0.07426231, 0.14167535, 0.4004343 , 0.89695182, 0.24108825,
        0.2987024 , 0.452844  , 0.79566103, 0.44566233, 0.93156571],
       [0.29919083, 0.14043667, 0.82908301, 0.18849677, 0.92083531,
        0.72199571, 0.23918676, 0.11534281, 0.71111679, 0.59196739],
       [0.35287507, 0.93144292, 0.09984819, 0.06610241, 0.08692311,
        0.97267655, 0.76874109, 0.24189725, 0.24577086, 0.88816116],
       [0.57603578, 0.17206053, 0.03900838, 0.235467  , 0.72562216,
        0.71056661, 0.77237925, 0.92932866, 0.29885186, 0.07685464],
       [0.15220609, 0.0629219 , 0.61211092, 0.24377965, 0.3031736 ,
        0.2807313 , 0.56958555, 0.41459585, 0.51102999, 0.78950917],
       [0.31998926, 0.72196797, 0.25143335, 0.05519168, 0.58413424,
        0.89281645, 0.96354152, 0.67733497, 0.41681635, 0.62967471],
       [0.80862028, 0.26639888, 0.09040902, 0.9266112 , 0.87010557,
        0.52039608, 0.66714736, 0.70316625, 0.7854821 , 0.16920598],
       [0.94350783, 0.36418735, 0.77478057, 0.6535443 , 0.16562476,
        0.50576092, 0.65717018, 0.5047263 , 0.68163753, 0.72439885],
       [0.23512086, 0.86044499, 0.51851838, 0.6618975 , 0.19513392,
        0.91105141, 0.79445037, 0.34064833, 0.97714602, 0.66702951]])

sns.heatmap(x)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9b9edef0>

fl = sns.load_dataset('flights')

fl.head()

fl.sample(10)

year    month   passengers

0 1949 January 112

1 1949 February 118

2 1949 March 132

year January February March

1949 112 118 132

1950

1951

fl_ = fl.pivot(index='year', columns='month', values='passengers');

fl_.head()

sns.heatmap(fl_.T)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9baf99e8>

fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d");

fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d", cmap="YlGnBu");

fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d", 
            cmap=sns.diverging_palette(50, 200, n=45));

fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d", 
            cmap=sns.diverging_palette(250, 10, n=45),
            center=fl_.loc[1954, 'January']);

Task on open-ended visualisation¶

http://ml-india.org/datasets/

df = pd.read_excel('ameo_2015.xlsx')

df.head()

	species	island	culmen_length_mm	culmen_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	MALE
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	FEMALE
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	FEMALE
3	Adelie	Torgersen	NaN	NaN	NaN	NaN	NaN
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	FEMALE

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75

	carat	cut	color	clarity	depth	table	price	x	y	z	x_q
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43	(3.58, 4.296]
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31	(3.58, 4.296]
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31	(3.58, 4.296]
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63	(3.58, 4.296]
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75	(4.296, 5.012]

Consulting - Quality

Data Visualisations - 2

Week 12 - Class Demo