Difference makes the DIFFERENCE
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes = True)
d = sns.load_dataset('diamonds')
d
carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
53935 | 0.72 | Ideal | D | SI1 | 60.8 | 57.0 | 2757 | 5.75 | 5.76 | 3.50 |
53936 | 0.72 | Good | D | SI1 | 63.1 | 55.0 | 2757 | 5.69 | 5.75 | 3.61 |
53937 | 0.70 | Very Good | D | SI1 | 62.8 | 60.0 | 2757 | 5.66 | 5.68 | 3.56 |
53938 | 0.86 | Premium | H | SI2 | 61.0 | 58.0 | 2757 | 6.15 | 6.12 | 3.74 |
53939 | 0.75 | Ideal | D | SI2 | 62.2 | 55.0 | 2757 | 5.83 | 5.87 | 3.64 |
53940 rows × 10 columns
d.groupby('cut').count()
carat | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|
cut | |||||||||
Ideal | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 |
Premium | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 |
Very Good | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 |
Good | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 |
Fair | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 |
d.groupby('cut').count()
carat | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|
cut | |||||||||
Ideal | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 | 21551 |
Premium | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 | 13791 |
Very Good | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 | 12082 |
Good | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 | 4906 |
Fair | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 | 1610 |
# only for the cut type
d.groupby('cut')['cut'].count()
cut Ideal 21551 Premium 13791 Very Good 12082 Good 4906 Fair 1610 Name: cut, dtype: int64
c = d.groupby('cut')['cut'].count()
sns.barplot(x=c.index, y=c.values)
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d8712190>
d.head()
carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
c = d.groupby('clarity')['clarity'].count()
sns.barplot(x=c.index, y=c.values)
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d857df50>
c = d.groupby('color')['color'].count()
sns.barplot(x=c.index, y=c.values)
<matplotlib.axes._subplots.AxesSubplot at 0x7fa4d86b4f90>
c = d.groupby('cut').count()
sns.barplot(x=c.index, y=c.values)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-19-5e3b9199463a> in <module>() ----> 1 sns.barplot(x=c.index, y=c.values) /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py in inner_f(*args, **kwargs) 44 ) 45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) ---> 46 return f(**kwargs) 47 return inner_f 48 /usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in barplot(x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge, ax, **kwargs) 3183 estimator, ci, n_boot, units, seed, 3184 orient, color, palette, saturation, -> 3185 errcolor, errwidth, capsize, dodge) 3186 3187 if ax is None: /usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge) 1583 """Initialize the plotter.""" 1584 self.establish_variables(x, y, hue, data, orient, -> 1585 order, hue_order, units) 1586 self.establish_colors(color, palette, saturation) 1587 self.estimate_statistic(estimator, ci, n_boot, seed) /usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units) 205 # Group the numeric data 206 plot_data, value_label = self._group_longform(vals, groups, --> 207 group_names) 208 209 # Now handle the hue levels for nested ordering /usr/local/lib/python3.7/dist-packages/seaborn/categorical.py in _group_longform(self, vals, grouper, order) 248 else: 249 index = None --> 250 vals = pd.Series(vals, index=index) 251 252 # Group the val data /usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath) 437 data = data.copy() 438 else: --> 439 data = sanitize_array(data, index, dtype, copy) 440 441 manager = get_option("mode.data_manager") /usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure, allow_2d) 574 subarr = maybe_infer_to_datetimelike(subarr) 575 --> 576 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) 577 578 if isinstance(subarr, np.ndarray): /usr/local/lib/python3.7/dist-packages/pandas/core/construction.py in _sanitize_ndim(result, data, dtype, index, allow_2d) 625 if allow_2d: 626 return result --> 627 raise ValueError("Data must be 1-dimensional") 628 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): 629 # i.e. PandasDtype("O") ValueError: Data must be 1-dimensional