matplotlib_pie

# Data Visualisation Part II - with MATPLOT LIB¶

## Pie Chart, Donut and Nested Donut charts¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use(['dark_background'])
import seaborn as sns
sns.set(color_codes = True)


## Plotting with composition of data¶

• for example, if there are a group of students who passed X class board exam, how many of them are boys and how many of them are girls
• considering Covid dataset, how many of them are recovered, deceased, suffering....

## Static composition¶

Pie Chart

In [2]:
pen = sns.load_dataset('penguins')

In [3]:
pen.head(2)

Out[3]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
In [4]:
pen.groupby('species').count()

Out[4]:
island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
species
Adelie 152 151 151 151 151 146
Chinstrap 68 68 68 68 68 68
Gentoo 124 123 123 123 123 119
In [5]:
pen.groupby('species')['species'].count()

Out[5]:
species
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [6]:
c = pen.groupby('species')['species'].count()

In [42]:
c

Out[42]:
species
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [7]:
plt.pie(c)
plt.show()

In [8]:
plt.pie(c, labels = c.index, autopct="%.2f%%")
plt.show()

In [9]:
plt.pie(c, labels = c.index, autopct="%.2f%%", explode=[0,1,0], startangle = 180)
plt.show()

In [10]:
plt.pie(np.random.randint(0, 10, 10), autopct="%.2f%%", pctdistance=1);


# Donut chart¶

In [60]:
plt.pie(np.random.randint(0, 10, 10));
plt.show()

In [61]:
plt.pie(np.random.randint(1, 10, 10), wedgeprops=dict(width=0.25));


## Matplotlib Color Map ( - Qualitative Color Maps)¶

https://matplotlib.org/stable/tutorials/colors/colormaps.html

In [94]:
cmap = plt.get_cmap('Paired')
my_colors = cmap(np.arange(10))

In [95]:
plt.pie(np.random.randint(1, 10, 10),
wedgeprops=dict(width=0.25),
colors = my_colors);

In [96]:
c

Out[96]:
species
Chinstrap     68
Gentoo       124
Name: species, dtype: int64
In [97]:
plt.pie(c, labels = c.index,
autopct = "%.2f%%",
wedgeprops=dict(width=0.25), colors = my_colors);

In [98]:
c_i = pen.groupby('island')['island'].count()
c_i

Out[98]:
island
Biscoe       168
Dream        124
Torgersen     52
Name: island, dtype: int64
In [99]:
plt.pie(c_i, labels = c_i.index, autopct="%0.2f",
wedgeprops = dict(width=0.3),
colors = my_colors);

In [90]:
cmap = plt.get_cmap('tab10')
my_colors = cmap(np.arange(10))

In [93]:
plt.pie(c_i, autopct="%0.2f%%", labels = c_i.index,
wedgeprops=dict(width=0.3));


# Crosstab in Pandas¶

In [101]:
pd.crosstab(pen.species, pen.island)

Out[101]:
island Biscoe Dream Torgersen
species
Chinstrap 0 68 0
Gentoo 124 0 0
In [102]:
species_count = pd.crosstab(pen.species, pen.island)

In [103]:
species_count

Out[103]:
island Biscoe Dream Torgersen
species
Chinstrap 0 68 0
Gentoo 124 0 0

## to get islands on the outer rim¶

In [104]:
species_count = species_count.T

In [107]:
sc = species_count

In [110]:
sc # island as index and the corresponding cols as values

Out[110]:
island
Biscoe 44 0 124
Dream 56 68 0
Torgersen 52 0 0
In [112]:
plt.pie(sc.sum(axis=1), labels = sc.index,

In [114]:
plt.pie(sc.sum(axis=1), labels = sc.index,

In [116]:
cmap = plt.get_cmap('tab20c')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))

In [118]:
plt.pie(sc.sum(axis=1), labels = sc.index,
radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
wedgeprops = dict(width=0.3), colors = inner_colors);

In [122]:
plt.pie(sc.sum(axis=1), labels = sc.index,
radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
labels = ['A,', 'C', 'G', 'A,', 'C', 'G', 'A,', 'C', 'G' ],
wedgeprops = dict(width=0.3), colors = inner_colors);

In [124]:
plt.pie(sc.sum(axis=1), labels = sc.index,
radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
wedgeprops = dict(width=0.3), colors = inner_colors,
labeldistance = 0.8);

In [129]:
cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))

plt.pie(sc.sum(axis=1), labels = sc.index,
radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
wedgeprops = dict(width=0.3), colors = inner_colors,
labeldistance = 0.8);

In [131]:
cmap = plt.get_cmap('tab20b')
outer_colors = cmap(np.array([0, 4, 8]))
inner_colors = cmap(np.array([1,2,35,6,7, 9, 10, 11]))

plt.pie(sc.sum(axis=1), labels = sc.index,
radius = 1, wedgeprops=dict(width=.3), colors = outer_colors);
labels = ['A,', '', 'G', 'A,', 'C', '', 'A,', '', '' ],
wedgeprops = dict(width=0.3), colors = inner_colors,
labeldistance = 0.8,
textprops = dict(color='w'));

In [19]:
import json
import urllib.request

In [20]:
url = 'https://api.covid19india.org/states_daily.json'
urllib.request.urlretrieve(url, 'data.json')

with open('data.json') as f:
data = data['states_daily']
df = pd.json_normalize(data)

In [24]:
df_ = df.tail(3)

In [25]:
df_

Out[25]:
an ap ar as br ch ct date dateymd dd ... sk status tg tn tr tt un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 2021-08-16 0 ... 20 Confirmed 405 1851 52 24696 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 2021-08-16 0 ... 147 Recovered 577 1911 223 36871 0 36 54 691
1562 0 13 0 10 0 0 1 16-Aug-21 2021-08-16 0 ... 0 Deceased 3 28 1 438 0 1 1 9

3 rows × 42 columns

In [28]:
df_.drop('dateymd', inplace=True, axis=1)

/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,

In [29]:
df_.head(2)

Out[29]:
an ap ar as br ch ct date dd dl ... sk status tg tn tr tt un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 0 27 ... 20 Confirmed 405 1851 52 24696 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 0 73 ... 147 Recovered 577 1911 223 36871 0 36 54 691

2 rows × 41 columns

In [30]:
df_.drop('tt', inplace = True, axis=1)

/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,

In [31]:
df_.head(2)

Out[31]:
an ap ar as br ch ct date dd dl ... rj sk status tg tn tr un up ut wb
1560 1 909 165 758 14 2 68 16-Aug-21 0 27 ... 11 20 Confirmed 405 1851 52 0 17 18 502
1561 0 1543 249 1014 42 3 224 16-Aug-21 0 73 ... 44 147 Recovered 577 1911 223 0 36 54 691

2 rows × 40 columns

In [32]:
df_.drop('date', axis=1, inplace=True)
df_.set_index('status', inplace = True)

/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:4913: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,

In [35]:
df_ = df_.T

In [36]:
df_

Out[36]:
an ap ar as br ch ct dd dl dn ... py rj sk tg tn tr un up ut wb
status
Confirmed 1 909 165 758 14 2 68 0 27 2 ... 49 11 20 405 1851 52 0 17 18 502
Recovered 0 1543 249 1014 42 3 224 0 73 0 ... 86 44 147 577 1911 223 0 36 54 691
Deceased 0 13 0 10 0 0 1 0 0 0 ... 0 0 0 3 28 1 0 1 1 9

3 rows × 38 columns

In [37]:
df_ = df_.apply(pd.to_numeric)

In [39]:
df_ = df_.T

In [40]:
df_.head(2)

Out[40]:
status Confirmed Recovered Deceased
an 1 0 0
ap 909 1543 13
In [134]:
!pip install nbconvert

In [ ]:
%shell jupyter nbconvert --to html /content/testfile.ipynb