SEABORN BAR PLOTS
Padhai - Classwork
padhai_barplots

Categorical distribution - Seaborn - Bar Plots

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)
In [2]:
d = sns.load_dataset('diamonds')
In [3]:
d
Out[3]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

distribution of Categorical dataset in Diamonds

Bar plots

In [4]:
d.groupby('cut').count()
Out[4]:
carat color clarity depth table price x y z
cut
Ideal 21551 21551 21551 21551 21551 21551 21551 21551 21551
Premium 13791 13791 13791 13791 13791 13791 13791 13791 13791
Very Good 12082 12082 12082 12082 12082 12082 12082 12082 12082
Good 4906 4906 4906 4906 4906 4906 4906 4906 4906
Fair 1610 1610 1610 1610 1610 1610 1610 1610 1610
In [7]:
d.groupby('cut').count()
Out[7]:
carat color clarity depth table price x y z
cut
Ideal 21551 21551 21551 21551 21551 21551 21551 21551 21551
Premium 13791 13791 13791 13791 13791 13791 13791 13791 13791
Very Good 12082 12082 12082 12082 12082 12082 12082 12082 12082
Good 4906 4906 4906 4906 4906 4906 4906 4906 4906
Fair 1610 1610 1610 1610 1610 1610 1610 1610 1610

split - apply - aggregate - GrpoupBy Concept

In [9]:
# only for the cut type

d.groupby('cut')['cut'].count()
Out[9]:
cut
Ideal        21551
Premium      13791
Very Good    12082
Good          4906
Fair          1610
Name: cut, dtype: int64
In [11]:
c = d.groupby('cut')['cut'].count()
In [12]:
sns.barplot(x=c.index, y=c.values)
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d8712190>
In [13]:
d.head()
Out[13]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [14]:
c = d.groupby('clarity')['clarity'].count()
In [15]:
sns.barplot(x=c.index, y=c.values)
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d857df50>
In [16]:
c = d.groupby('color')['color'].count()
In [17]:
sns.barplot(x=c.index, y=c.values)
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d86b4f90>
In [18]:
c = d.groupby('cut').count()
In [19]:
sns.barplot(x=c.index, y=c.values)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-5e3b9199463a> in <module>()
----> 1 sns.barplot(x=c.index, y=c.values)

/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in barplot(x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge, ax, **kwargs)
   3183                           estimator, ci, n_boot, units, seed,
   3184                           orient, color, palette, saturation,
-> 3185                           errcolor, errwidth, capsize, dodge)
   3186 
   3187     if ax is None:

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)
   1583         """Initialize the plotter."""
   1584         self.establish_variables(x, y, hue, data, orient,
-> 1585                                  order, hue_order, units)
   1586         self.establish_colors(color, palette, saturation)
   1587         self.estimate_statistic(estimator, ci, n_boot, seed)

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units)
    205                 # Group the numeric data
    206                 plot_data, value_label = self._group_longform(vals, groups,
--> 207                                                               group_names)
    208 
    209                 # Now handle the hue levels for nested ordering

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in _group_longform(self, vals, grouper, order)
    248             else:
    249                 index = None
--> 250             vals = pd.Series(vals, index=index)
    251 
    252         # Group the val data

/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    437                     data = data.copy()
    438             else:
--> 439                 data = sanitize_array(data, index, dtype, copy)
    440 
    441                 manager = get_option("mode.data_manager")

/usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure, allow_2d)
    574                 subarr = maybe_infer_to_datetimelike(subarr)
    575 
--> 576     subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
    577 
    578     if isinstance(subarr, np.ndarray):

/usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in _sanitize_ndim(result, data, dtype, index, allow_2d)
    625             if allow_2d:
    626                 return result
--> 627             raise ValueError("Data must be 1-dimensional")
    628         if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):
    629             # i.e. PandasDtype("O")

ValueError: Data must be 1-dimensional
In [ ]: