Data Visualisations 
Week 11 - Class Demo
FDS_W11

Week 11: Data Visualisation part 1

uploaded as given

In [ ]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import style
plt.style.use(['dark_background'])

import seaborn as sns
sns.set(color_codes=True)
/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm
In [ ]:
style.use('seaborn-ticks')
sns.set(color_codes=True)

Tabulation

In [ ]:
url = 'https://api.covid19india.org/states_daily.json'
In [ ]:
import urllib.request
In [ ]:
urllib.request.urlretrieve(url, 'data.json');
In [ ]:
covid_data = pd.read_json('data.json')
In [ ]:
covid_data
Out[ ]:
states_daily
0 {'an': '0', 'ap': '1', 'ar': '0', 'as': '0', '...
1 {'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
2 {'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
3 {'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
4 {'an': '0', 'ap': '0', 'ar': '0', 'as': '0', '...
... ...
319 {'an': '2', 'ap': '428', 'ar': '6', 'as': '274...
320 {'an': '0', 'ap': '12', 'ar': '0', 'as': '1', ...
321 {'an': '7', 'ap': '793', 'ar': '5', 'as': '302...
322 {'an': '0', 'ap': '324', 'ar': '1', 'as': '245...
323 {'an': '0', 'ap': '11', 'ar': '0', 'as': '0', ...

324 rows × 1 columns

In [ ]:
import json
In [ ]:
with open('data.json') as f:
    data = json.load(f)
In [ ]:
data = data['states_daily']
In [ ]:
covid_data = pd.json_normalize(data)
In [ ]:
covid_data
Out[ ]:
an ap ar as br ch ct date dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk status tg tn tr tt un up ut wb
0 0 1 0 0 0 0 0 14-Mar-20 0 7 0 0 0 0 14 0 2 6 19 0 0 14 0 0 0 0 0 0 1 0 3 0 Confirmed 1 1 0 81 0 12 0 0
1 0 0 0 0 0 0 0 14-Mar-20 0 1 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 1 0 Recovered 0 0 0 9 0 4 0 0
2 0 0 0 0 0 0 0 14-Mar-20 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 Deceased 0 0 0 2 0 0 0 0
3 0 0 0 0 0 0 0 15-Mar-20 0 0 0 0 0 0 0 0 0 0 5 0 0 18 0 0 0 0 0 0 0 0 1 0 Confirmed 2 0 0 27 0 1 0 0
4 0 0 0 0 0 0 0 15-Mar-20 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 Recovered 1 0 0 4 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
319 2 428 6 274 226 1 125 28-Jun-20 0 3306 13 58 391 17 445 69 91 220 42 32 0 2330 0 23 113 0 0 137 206 31 244 0 Recovered 244 1443 8 11628 0 593 106 404
320 0 12 0 1 4 0 0 28-Jun-20 0 65 0 1 19 0 5 0 1 16 0 0 0 156 0 0 7 0 0 3 5 1 8 0 Deceased 4 54 0 384 0 11 1 10
321 7 793 5 302 394 3 101 29-Jun-20 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 Confirmed 975 3949 34 18339 -554 681 8 624
322 0 324 1 245 218 13 88 29-Jun-20 0 3628 6 46 440 38 585 56 269 176 79 30 0 2385 0 39 115 6 4 203 238 10 310 0 Recovered 410 2212 6 13497 0 698 93 526
323 0 11 0 0 1 0 0 29-Jun-20 0 57 0 0 19 0 9 3 1 19 1 0 0 181 0 0 7 0 0 2 5 0 6 0 Deceased 6 62 0 417 0 12 1 14

324 rows × 41 columns

In [ ]:
df = covid_data
In [ ]:
df.date = pd.to_datetime(df.date)
In [ ]:
df = df[df.status == 'Confirmed']
In [ ]:
df.drop('status', axis=1, inplace=True)
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
In [ ]:
df.set_index('date', inplace=True)
In [ ]:
df
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt un up ut wb
date
2020-03-14 0 1 0 0 0 0 0 0 7 0 0 0 0 14 0 2 6 19 0 0 14 0 0 0 0 0 0 1 0 3 0 1 1 0 81 0 12 0 0
2020-03-15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 18 0 0 0 0 0 0 0 0 1 0 2 0 0 27 0 1 0 0
2020-03-16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 3 0 0 6 0 0 0 0 0 1 0 1 0 0 1 0 0 15 0 0 1 0
2020-03-17 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 1 0 0 11 0 2 0 1
2020-03-18 0 0 0 0 0 0 0 0 2 0 0 0 0 1 0 1 5 0 8 0 3 0 0 0 0 0 1 1 0 3 0 8 1 0 37 0 2 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2020-06-25 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 352 636 68 475
2020-06-26 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 -370 750 34 542
2020-06-27 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 -100 606 66 521
2020-06-28 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 -184 598 32 572
2020-06-29 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 -554 681 8 624

108 rows × 39 columns

In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 108 entries, 2020-03-14 to 2020-06-29
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   an      108 non-null    object
 1   ap      108 non-null    object
 2   ar      108 non-null    object
 3   as      108 non-null    object
 4   br      108 non-null    object
 5   ch      108 non-null    object
 6   ct      108 non-null    object
 7   dd      108 non-null    object
 8   dl      108 non-null    object
 9   dn      108 non-null    object
 10  ga      108 non-null    object
 11  gj      108 non-null    object
 12  hp      108 non-null    object
 13  hr      108 non-null    object
 14  jh      108 non-null    object
 15  jk      108 non-null    object
 16  ka      108 non-null    object
 17  kl      108 non-null    object
 18  la      108 non-null    object
 19  ld      108 non-null    object
 20  mh      108 non-null    object
 21  ml      108 non-null    object
 22  mn      108 non-null    object
 23  mp      108 non-null    object
 24  mz      108 non-null    object
 25  nl      108 non-null    object
 26  or      108 non-null    object
 27  pb      108 non-null    object
 28  py      108 non-null    object
 29  rj      108 non-null    object
 30  sk      108 non-null    object
 31  tg      108 non-null    object
 32  tn      108 non-null    object
 33  tr      108 non-null    object
 34  tt      108 non-null    object
 35  un      108 non-null    object
 36  up      108 non-null    object
 37  ut      108 non-null    object
 38  wb      108 non-null    object
dtypes: object(39)
memory usage: 33.8+ KB
In [ ]:
df.tn
Out[ ]:
date
2020-03-14       1
2020-03-15       0
2020-03-16       0
2020-03-17       0
2020-03-18       1
              ... 
2020-06-25    3509
2020-06-26    3645
2020-06-27    3713
2020-06-28    3940
2020-06-29    3949
Name: tn, Length: 108, dtype: object
In [ ]:
pd.to_numeric(df.tn)
Out[ ]:
date
2020-03-14       1
2020-03-15       0
2020-03-16       0
2020-03-17       0
2020-03-18       1
              ... 
2020-06-25    3509
2020-06-26    3645
2020-06-27    3713
2020-06-28    3940
2020-06-29    3949
Name: tn, Length: 108, dtype: int64
In [ ]:
df = df.apply(pd.to_numeric)
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 108 entries, 2020-03-14 to 2020-06-29
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   an      108 non-null    int64
 1   ap      108 non-null    int64
 2   ar      108 non-null    int64
 3   as      108 non-null    int64
 4   br      108 non-null    int64
 5   ch      108 non-null    int64
 6   ct      108 non-null    int64
 7   dd      108 non-null    int64
 8   dl      108 non-null    int64
 9   dn      108 non-null    int64
 10  ga      108 non-null    int64
 11  gj      108 non-null    int64
 12  hp      108 non-null    int64
 13  hr      108 non-null    int64
 14  jh      108 non-null    int64
 15  jk      108 non-null    int64
 16  ka      108 non-null    int64
 17  kl      108 non-null    int64
 18  la      108 non-null    int64
 19  ld      108 non-null    int64
 20  mh      108 non-null    int64
 21  ml      108 non-null    int64
 22  mn      108 non-null    int64
 23  mp      108 non-null    int64
 24  mz      108 non-null    int64
 25  nl      108 non-null    int64
 26  or      108 non-null    int64
 27  pb      108 non-null    int64
 28  py      108 non-null    int64
 29  rj      108 non-null    int64
 30  sk      108 non-null    int64
 31  tg      108 non-null    int64
 32  tn      108 non-null    int64
 33  tr      108 non-null    int64
 34  tt      108 non-null    int64
 35  un      108 non-null    int64
 36  up      108 non-null    int64
 37  ut      108 non-null    int64
 38  wb      108 non-null    int64
dtypes: int64(39)
memory usage: 33.8 KB
In [ ]:
df.tail(7)
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt un up ut wb
date
2020-06-23 2 462 10 203 157 7 83 0 3947 9 45 549 48 495 53 148 322 141 85 0 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 183 571 133 370
2020-06-24 6 497 2 226 223 2 34 0 3788 13 42 572 31 490 26 186 397 152 0 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 126 664 88 445
2020-06-25 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 352 636 68 475
2020-06-26 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 -370 750 34 542
2020-06-27 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 -100 606 66 521
2020-06-28 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 -184 598 32 572
2020-06-29 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 -554 681 8 624

Styling tabulation

In [ ]:
df = df.tail(7)
In [ ]:
df.style
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt un up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 0 3947 9 45 549 48 495 53 148 322 141 85 0 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 183 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 0 3788 13 42 572 31 490 26 186 397 152 0 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 126 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 352 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 -370 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 -100 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 -184 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 -554 681 8 624
In [ ]:
def colour_red_negative(x):
    color = 'red' if x < 0 else 'white'
    return 'color: ' + color
In [ ]:
df.style.applymap(colour_red_negative)
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt un up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 0 3947 9 45 549 48 495 53 148 322 141 85 0 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 183 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 0 3788 13 42 572 31 490 26 186 397 152 0 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 126 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 352 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 -370 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 -100 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 -184 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 -554 681 8 624
In [ ]:
df.drop('un', axis=1, inplace=True)
In [ ]:
df.style.applymap(colour_red_negative)
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 0 3947 9 45 549 48 495 53 148 322 141 85 0 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 0 3788 13 42 572 31 490 26 186 397 152 0 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 681 8 624
In [ ]:
df.style.highlight_max(color='red')
Out[ ]:
an ap ar as br ch ct dd dl dn ga gj hp hr jh jk ka kl la ld mh ml mn mp mz nl or pb py rj sk tg tn tr tt up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 0 3947 9 45 549 48 495 53 148 322 141 85 0 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 0 3788 13 42 572 31 490 26 186 397 152 0 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 0 3390 20 44 577 33 453 44 127 442 123 9 0 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 0 3460 15 44 580 25 421 31 213 445 150 5 0 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 0 2948 15 89 615 30 543 45 204 918 195 14 0 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 0 2889 4 70 624 22 402 25 127 1267 118 3 0 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 0 2084 15 53 626 26 381 62 144 1105 122 1 0 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 681 8 624
In [ ]:
df.drop(['dd', 'ld'], axis=1,inplace=True)
In [ ]:
df.style.highlight_max(color='red').highlight_min(color='green')
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr tt up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 15656 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 16868 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 18205 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 18255 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 20142 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 19610 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 18339 681 8 624
In [ ]:
df.drop('tt', axis=1, inplace=True)
In [ ]:
def bold_max_value(x):
    is_max = (x == x.max())
    return ['font-weight: bold' if y else '' for y in is_max]
In [ ]:
df.style.apply(bold_max_value)
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.apply(bold_max_value).highlight_min(color='green')
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.apply(bold_max_value).highlight_min(color='green', axis=1)
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.apply(bold_max_value).highlight_max(color='red', axis=1)
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.background_gradient(cmap='Reds')
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.background_gradient(cmap='Reds', axis=1)
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.background_gradient(cmap='Reds', subset=['mh', 'tn', 'dl'])
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.bar()
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df.style.bar(subset=['mh', 'tn', 'dl'])
Out[ ]:
an ap ar as br ch ct dl dn ga gj hp hr jh jk ka kl la mh ml mn mp mz nl or pb py rj sk tg tn tr up ut wb
date
2020-06-23 00:00:00 2 462 10 203 157 7 83 3947 9 45 549 48 495 53 148 322 141 85 3214 1 23 183 0 50 167 162 19 395 1 879 2516 23 571 133 370
2020-06-24 00:00:00 6 497 2 226 223 2 34 3788 13 42 572 31 490 26 186 397 152 0 3889 0 49 187 3 17 282 230 59 382 4 891 2865 0 664 88 445
2020-06-25 00:00:00 2 553 12 364 215 3 37 3390 20 44 577 33 453 44 127 442 123 9 4842 0 86 147 0 8 210 142 41 287 2 920 3509 32 636 68 475
2020-06-26 00:00:00 14 605 2 273 190 2 89 3460 15 44 580 25 421 31 213 445 150 5 5024 2 19 203 2 16 218 188 32 364 2 985 3645 35 750 34 542
2020-06-27 00:00:00 0 796 3 246 302 3 65 2948 15 89 615 30 543 45 204 918 195 14 6368 0 17 167 3 16 170 99 85 284 0 1087 3713 9 606 66 521
2020-06-28 00:00:00 11 813 5 327 244 3 84 2889 4 70 624 22 402 25 127 1267 118 3 5493 2 93 221 1 28 264 160 29 327 1 983 3940 12 598 32 572
2020-06-29 00:00:00 7 793 5 302 394 3 101 2084 15 53 626 26 381 62 144 1105 122 1 5257 1 42 184 0 36 245 202 42 389 0 975 3949 34 681 8 624
In [ ]:
df[['mh', 'tn', 'dl']].style.bar()
Out[ ]:
mh tn dl
date
2020-06-23 00:00:00 3214 2516 3947
2020-06-24 00:00:00 3889 2865 3788
2020-06-25 00:00:00 4842 3509 3390
2020-06-26 00:00:00 5024 3645 3460
2020-06-27 00:00:00 6368 3713 2948
2020-06-28 00:00:00 5493 3940 2889
2020-06-29 00:00:00 5257 3949 2084
In [ ]:
df[['mh', 'tn', 'dl']].style.bar(subset=['mh'], color='red').bar(subset=['tn'], color='orange').bar(subset=['dl'], color='yellow')
Out[ ]:
mh tn dl
date
2020-06-23 00:00:00 3214 2516 3947
2020-06-24 00:00:00 3889 2865 3788
2020-06-25 00:00:00 4842 3509 3390
2020-06-26 00:00:00 5024 3645 3460
2020-06-27 00:00:00 6368 3713 2948
2020-06-28 00:00:00 5493 3940 2889
2020-06-29 00:00:00 5257 3949 2084

Distribution of data

Distribution of a single continuous variable

Histogram

In [ ]:
x = np.random.normal(size=1000)
In [ ]:
sns.distplot(x);
In [ ]:
sns.distplot(x, kde=False);
In [ ]:
 
In [ ]:
sns.distplot(x, kde=False, rug=True);
In [ ]:
sns.distplot(x, kde=False, rug=True, bins=50);
In [ ]:
sns.kdeplot(x);
In [ ]:
sns.kdeplot(x, shade=True);
In [ ]:
y = np.random.uniform(size=1000)
In [ ]:
sns.kdeplot(x,shade=True)
sns.kdeplot(y,shade=True);
In [ ]:
d = sns.load_dataset('diamonds')
In [ ]:
d
Out[ ]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

In [ ]:
d.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53940 entries, 0 to 53939
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   carat    53940 non-null  float64
 1   cut      53940 non-null  object 
 2   color    53940 non-null  object 
 3   clarity  53940 non-null  object 
 4   depth    53940 non-null  float64
 5   table    53940 non-null  float64
 6   price    53940 non-null  int64  
 7   x        53940 non-null  float64
 8   y        53940 non-null  float64
 9   z        53940 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 4.1+ MB
In [ ]:
sns.distplot(d.carat);
In [ ]:
sns.distplot(d.price);
In [ ]:
sns.distplot(d.x);
In [ ]:
sns.distplot(d.x, rug=True);
In [ ]:
sns.distplot(d.sample(1000).x, rug=True, bins=50);
In [ ]:
sns.kdeplot(d.x, shade=True)
sns.kdeplot(d.y, shade=True)
sns.kdeplot(d.z, shade=True);

Box plot

In [ ]:
x = np.random.normal(size=1000)
In [ ]:
sns.boxplot(x)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0f1378fc18>
In [ ]:
sns.kdeplot(x);
In [ ]:
x = np.random.uniform(size=1000)
In [ ]:
sns.boxplot(x);
In [ ]:
sns.boxplot(x, whis=0.2)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0f14d0c6a0>
In [ ]:
x = np.random.normal(size=1000)
In [ ]:
sns.boxplot(x, whis=0.5);
In [ ]:
sns.boxplot(x, whis=0.5, fliersize=1);
In [ ]:
sns.boxplot(x, whis=0.5, fliersize=1, orient='v');
In [ ]:
sns.boxplot(d.price);
In [ ]:
sns.kdeplot(d.price);