Difference makes the DIFFERENCE
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
plt.style.use(['dark_background'])
import urllib.request
import json
import seaborn as sns
sns.set(color_codes=True)
p = sns.load_dataset('penguins')
p.head()
p.groupby('species')['species'].count()
c = p.groupby('species')['species'].count()
plt.pie(c);
plt.show()
plt.pie(c, labels=c.index);
plt.show()
plt.pie(c, labels=c.index, autopct="%.2f%%");
plt.show()
plt.pie(c, labels=c.index, autopct="%.2f%%",
explode=[0, 1, 0]);
plt.show()
plt.pie(c, labels=c.index, autopct="%.2f%%",
explode=[0, 1, 0], startangle=180);
plt.show()
plt.pie(c, labels=c.index, autopct="%.2f%%",
explode=[0, 1, 0], startangle=180, shadow=True);
plt.show()
plt.pie(np.random.randint(0, 10, 10));
plt.show()
plt.pie(np.random.randint(0, 10, 10), wedgeprops=dict(width=0.3));
plt.show()
cmap = plt.get_cmap('Accent')
my_colours = cmap(np.arange(10))
plt.pie(np.random.randint(0, 10, 10),
wedgeprops=dict(width=0.3),
colors=my_colours);
plt.show()
plt.pie(c, labels=c.index, autopct="%.2f%%", wedgeprops=dict(width=0.3));
plt.show()
c_i = p.groupby('island')['island'].count()
plt.pie(c_i, labels=c_i.index, autopct="%.2f%%", wedgeprops=dict(width=0.3));
plt.show()
c = pd.crosstab(p.species, p.island)
c = c.T
c
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3));
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3));
plt.pie(c.values.flatten(), radius=0.7,
wedgeprops=dict(width=0.3));
cmap = plt.get_cmap('tab20c')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1, 2, 3, 5, 6, 7, 9, 10, 11]))
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3),
colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7,
wedgeprops=dict(width=0.3),
colors=inner_colors);
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3),
colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7,
labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
wedgeprops=dict(width=0.3),
colors=inner_colors);
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3),
colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7,
labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
wedgeprops=dict(width=0.3),
colors=inner_colors,
labeldistance=0.75);
cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1, 2, 3, 5, 6, 7, 9, 10, 11]))
plt.pie(c.sum(axis=1), labels=c.index,
radius = 1, wedgeprops=dict(width=0.3),
colors=outer_colors);
plt.pie(c.values.flatten(), radius=0.7,
labels = ['A', '', 'G', 'A', 'C', '', 'A', '', ''],
wedgeprops=dict(width=0.3),
colors=inner_colors,
labeldistance=0.75, textprops=dict(color='w'));
url = 'https://api.covid19india.org/states_daily.json'
urllib.request.urlretrieve(url, 'data.json')
with open('data.json') as f:
data = json.load(f)
data = data['states_daily']
df = pd.json_normalize(data)
df.head()
df_ = df.tail(3)
df_.drop('date', axis=1, inplace=True)
df_.set_index('status', inplace=True)
df_ = df_.T
df_ = df_.apply(pd.to_numeric)
df_.drop('tt', inplace=True)
df_.head()
plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Confirmed);
plt.xticks(rotation=90);
plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.xticks(rotation=90);
plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered);
plt.xticks(rotation=90);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed);
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed);
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered);
plt.xticks(rotation=90);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);
for i, val in enumerate(df_.index):
y = df_.loc[val].sum() + 100
x = i
plt.text(x, y, str(y), ha="center");
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);
for i, val in enumerate(df_.index):
y = df_.loc[val].sum() + 100
if y > 1000:
x = i
plt.text(x, y, str(y), ha="center");
df_.head()
df_['Total'] = 0
df_['Total'] = df_.sum(axis = 1)
df_.head()
df_['ConfirmedFraction'] = df_['Confirmed'] / df_['Total']
df_['RecoveredFraction'] = df_['Recovered'] / df_['Total']
df_['DeceasedFraction'] = df_['Deceased'] / df_['Total']
df_.head()
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.ConfirmedFraction, color='Orange');
plt.bar(df_.index, df_.RecoveredFraction, bottom=df_.ConfirmedFraction, color='Green');
plt.bar(df_.index, df_.DeceasedFraction, bottom=df_.ConfirmedFraction + df_.RecoveredFraction, color='Red');
plt.xticks(rotation=90);
df_ = df_.sort_values('ConfirmedFraction', ascending=False)
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.ConfirmedFraction, color='Orange');
plt.bar(df_.index, df_.RecoveredFraction, bottom=df_.ConfirmedFraction, color='Green');
plt.bar(df_.index, df_.DeceasedFraction, bottom=df_.ConfirmedFraction + df_.RecoveredFraction, color='Red');
plt.xticks(rotation=90);
df_ = df_.sort_values('Total', ascending=False)
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.bar(df_.index, df_.Confirmed, color='Orange');
plt.bar(df_.index, df_.Recovered, bottom=df_.Confirmed, color='Green');
plt.bar(df_.index, df_.Deceased, bottom=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);
for i, val in enumerate(df_.index):
y = df_.loc[val, 'Total'] + 100
if y > 1000:
x = i
plt.text(x, y, str(y), ha="center");
df_ = df_.sort_values('Total', ascending=False)
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.barh(df_.index, df_.Confirmed, color='Orange');
plt.barh(df_.index, df_.Recovered, left=df_.Confirmed, color='Green');
plt.barh(df_.index, df_.Deceased, left=df_.Confirmed + df_.Recovered, color='Red');
plt.xticks(rotation=90);
df.head()
df_ = df[['mh', 'date', 'status']]
df_.head()
df_['mh'] = pd.to_numeric(df_['mh'])
df_['date'] = pd.to_datetime(df_['date'])
df_.head()
date | confirmed | recoverd | deceased |
2020-03-14 | 14 | 0 | 0
2020-03-14 | 18 | 0 | 0
pivot
df_ = df_.pivot_table(values="mh", columns="status", index="date")
df_.head()
df_.plot.area();
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
colors=['orange', 'green', 'red']);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
labels=['Confirmed', 'Recovered', 'Deceased'],
colors=['orange', 'green', 'red']);
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed, df_.Recovered, df_.Deceased,
labels=['Confirmed', 'Recovered', 'Deceased'],
colors=['orange', 'green', 'red']);
plt.legend();
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed/df_.sum(axis=1),
df_.Recovered/df_.sum(axis=1),
df_.Deceased/df_.sum(axis=1),
labels=['Confirmed', 'Recovered', 'Deceased'],
colors=['orange', 'green', 'red']);
plt.legend();
def plot_stacked_area_by_state(state):
df_ = df[[state, 'date', 'status']]
df_[state] = pd.to_numeric(df_[state])
df_['date'] = pd.to_datetime(df_['date'])
df_ = df_.pivot_table(values=state, columns="status", index="date")
fig = plt.gcf();
fig.set_size_inches(15, 6);
plt.stackplot(df_.index, df_.Confirmed/df_.sum(axis=1),
df_.Recovered/df_.sum(axis=1),
df_.Deceased/df_.sum(axis=1),
labels=['Confirmed', 'Recovered', 'Deceased'],
colors=['orange', 'green', 'red']);
plt.legend();
plot_stacked_area_by_state('tn')
plot_stacked_area_by_state('wb')
plot_stacked_area_by_state('dl')
t = sns.load_dataset('tips')
t.head()
sns.scatterplot(x='total_bill', y='tip', data=t);
t['tip_fraction'] = t['tip']/t['total_bill']
sns.scatterplot(x='total_bill', y='tip_fraction', data=t);
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='time');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='sex');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='smoker');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='day');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='size');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='size', style='sex');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='time', style='sex', size='size');
sns.scatterplot(x='total_bill', y='tip', data=t,
hue='time', style='sex', size='size');
plt.legend(bbox_to_anchor=(1.05, 1));
sns.regplot(x='total_bill', y='tip', data=t);
sns.regplot(x='total_bill', y='tip_fraction', data=t);
sns.regplot(x='total_bill', y='tip_fraction', data=t, marker="+");
d = sns.load_dataset('diamonds')
d.head()
sns.scatterplot('x', 'price', data=d.sample(1000));
sns.regplot('x', 'price', data=d.sample(1000));
sns.regplot('x', 'price', data=d.sample(1000), order=2, marker="+");
sns.barplot(x="day", y ="tip", data=t);
sns.barplot(x="day", y ="tip_fraction", data=t);
sns.barplot(x="day", y ="tip", data=t);
sns.barplot(x="day", y ="tip", data=t, estimator=np.median);
def my_estimate(v):
return np.quantile(v, 0.25)
sns.barplot(x="day", y ="tip", data=t, estimator=my_estimate);
sns.barplot(x="day", y ="tip", hue="sex", data=t, estimator=np.median);
sns.barplot(x="day", y ="tip", hue="smoker", data=t, estimator=np.median);
sns.barplot(x="day", y ="tip", hue="time", data=t, estimator=np.median);
sns.barplot(x="day", y ="tip_fraction", hue="time", data=t, estimator=np.median);
d.head()
sns.scatterplot('x', 'price', data=d.sample(1000));
sns.barplot('x', 'price', data=d.sample(1000));
d['x_q'] = pd.cut(d['x'], bins=15);
d.head()
sns.barplot('x_q', 'price', data=d.sample(1000));
d['x_q'] = pd.cut(d['x'], bins=15, labels=False);
d.head()
sns.barplot('x_q', 'price', data=d.sample(1000));
f = sns.load_dataset('fmri')
f.head()
sns.lineplot('timepoint', 'signal', data=f);
sns.lineplot('timepoint', 'signal', data=f, hue="region");
sns.lineplot('timepoint', 'signal', data=f, hue="event");
sns.lineplot('timepoint', 'signal', data=f, hue="event", style="region");
sns.lineplot('timepoint', 'signal', data=f, marker=True);
sns.lineplot('timepoint', 'signal', data=f, marker=True, estimator=np.median);
sns.lineplot('timepoint', 'signal', data=f, units='subject', estimator=None);
f_ = f[(f.region == "parietal") & (f.event == "cue")]
f_.head()
sns.lineplot('timepoint', 'signal', data=f_, hue='subject', estimator=None);
x = np.array([-3, -2, -1, 0, 1, 2, 3])
y = x * x
sns.lineplot(x, y);
with open('data.json') as f:
data = json.load(f)
data = data['states_daily']
df = pd.json_normalize(data)
df['date'] = pd.to_datetime(df['date'])
df.drop('tt', axis=1, inplace=True)
df.set_index('date', inplace=True)
df = df[df['status'] == 'Confirmed']
df.drop('status', axis=1, inplace=True)
df = df.apply(pd.to_numeric)
df = df.rolling(7).mean()
df.reset_index(inplace=True)
df.head()
date | state | confirmed
2020-03-14 | an | 0
2020-03-14 | ap | 1
df_ = pd.melt(df, id_vars="date",
value_vars = list(df.columns).remove("date"),
var_name = "state", value_name = "confirmed")
df_.head()
sns.lineplot('date', 'confirmed', data=df_);
sns.lineplot('date', 'confirmed', hue="state", data=df_);
states = ['mh', 'tn', 'dl', 'wb', 'ka', 'gj']
df_ = df_[df_.state.isin(states)]
sns.lineplot('date', 'confirmed', hue="state", data=df_);
sns.lineplot('date', 'confirmed', hue="state", data=df_,
palette='Reds');
fig = plt.gcf();
fig.set_size_inches(15, 6);
sns.lineplot('date', 'confirmed', hue="state", data=df_,
palette='Reds');
fig = plt.gcf();
fig.set_size_inches(15, 6);
sns.lineplot('date', 'confirmed', hue="state", data=df_,
palette='Reds', hue_order = ['wb', 'gj', 'ka', 'dl', 'tn', 'mh']);
X [10 x 10]
X(i, j) -> value
x = np.random.rand(10, 10)
x
sns.heatmap(x)
fl = sns.load_dataset('flights')
fl.head()
fl.sample(10)
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
year January February March
1949 112 118 132
1950
1951
fl_ = fl.pivot(index='year', columns='month', values='passengers');
fl_.head()
sns.heatmap(fl_.T)
fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d");
fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d", cmap="YlGnBu");
fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d",
cmap=sns.diverging_palette(50, 200, n=45));
fig = plt.gcf();
fig.set_size_inches(15, 10)
sns.heatmap(fl_.T, annot=True, fmt="d",
cmap=sns.diverging_palette(250, 10, n=45),
center=fl_.loc[1954, 'January']);
df = pd.read_excel('ameo_2015.xlsx')
df.head()