Categorical distribution - Seaborn - Bar Plots¶

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)

d = sns.load_dataset('diamonds')

d

distribution of Categorical dataset in Diamonds¶

Bar plots¶

d.groupby('cut').count()

d.groupby('cut').count()

split - apply - aggregate - GrpoupBy Concept¶

# only for the cut type

d.groupby('cut')['cut'].count()

cut
Ideal        21551
Premium      13791
Very Good    12082
Good          4906
Fair          1610
Name: cut, dtype: int64

c = d.groupby('cut')['cut'].count()

sns.barplot(x=c.index, y=c.values)

<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d8712190>

d.head()

c = d.groupby('clarity')['clarity'].count()

sns.barplot(x=c.index, y=c.values)

<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d857df50>

c = d.groupby('color')['color'].count()

sns.barplot(x=c.index, y=c.values)

<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d86b4f90>

c = d.groupby('cut').count()

sns.barplot(x=c.index, y=c.values)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-5e3b9199463a> in <module>()
----> 1 sns.barplot(x=c.index, y=c.values)

/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in barplot(x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge, ax, **kwargs)
   3183                           estimator, ci, n_boot, units, seed,
   3184                           orient, color, palette, saturation,
-> 3185                           errcolor, errwidth, capsize, dodge)
   3186 
   3187     if ax is None:

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)
   1583         """Initialize the plotter."""
   1584         self.establish_variables(x, y, hue, data, orient,
-> 1585                                  order, hue_order, units)
   1586         self.establish_colors(color, palette, saturation)
   1587         self.estimate_statistic(estimator, ci, n_boot, seed)

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units)
    205                 # Group the numeric data
    206                 plot_data, value_label = self._group_longform(vals, groups,
--> 207                                                               group_names)
    208 
    209                 # Now handle the hue levels for nested ordering

/usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in _group_longform(self, vals, grouper, order)
    248             else:
    249                 index = None
--> 250             vals = pd.Series(vals, index=index)
    251 
    252         # Group the val data

/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    437                     data = data.copy()
    438             else:
--> 439                 data = sanitize_array(data, index, dtype, copy)
    440 
    441                 manager = get_option("mode.data_manager")

/usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure, allow_2d)
    574                 subarr = maybe_infer_to_datetimelike(subarr)
    575 
--> 576     subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
    577 
    578     if isinstance(subarr, np.ndarray):

/usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in _sanitize_ndim(result, data, dtype, index, allow_2d)
    625             if allow_2d:
    626                 return result
--> 627             raise ValueError("Data must be 1-dimensional")
    628         if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):
    629             # i.e. PandasDtype("O")

ValueError: Data must be 1-dimensional

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75
...	...	...	...	...	...	...	...	...	...	...
53935	0.72	Ideal	D	SI1	60.8	57.0	2757	5.75	5.76	3.50
53936	0.72	Good	D	SI1	63.1	55.0	2757	5.69	5.75	3.61
53937	0.70	Very Good	D	SI1	62.8	60.0	2757	5.66	5.68	3.56
53938	0.86	Premium	H	SI2	61.0	58.0	2757	6.15	6.12	3.74
53939	0.75	Ideal	D	SI2	62.2	55.0	2757	5.83	5.87	3.64

	carat	cut	color	clarity	depth	table	price	x	y	z
0	0.23	Ideal	E	SI2	61.5	55.0	326	3.95	3.98	2.43
1	0.21	Premium	E	SI1	59.8	61.0	326	3.89	3.84	2.31
2	0.23	Good	E	VS1	56.9	65.0	327	4.05	4.07	2.31
3	0.29	Premium	I	VS2	62.4	58.0	334	4.20	4.23	2.63
4	0.31	Good	J	SI2	63.3	58.0	335	4.34	4.35	2.75

Consulting - Quality

SEABORN BAR PLOTS

Padhai - Classwork

Categorical distribution - Seaborn - Bar Plots¶

distribution of Categorical dataset in Diamonds¶

Bar plots¶

split - apply - aggregate - GrpoupBy Concept¶

	carat	color	clarity	depth	table	price	x	y	z
cut
Ideal	21551	21551	21551	21551	21551	21551	21551	21551	21551
Premium	13791	13791	13791	13791	13791	13791	13791	13791	13791
Very Good	12082	12082	12082	12082	12082	12082	12082	12082	12082
Good	4906	4906	4906	4906	4906	4906	4906	4906	4906
Fair	1610	1610	1610	1610	1610	1610	1610	1610	1610