SEABORN - SCATTER PLOT Padhai ClassWork
dvp2_scatter_plots

Scatter Plot and Regression plot with "tips" dataset from seaborn package

In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use(['dark_background'])
import seaborn as sns
sns.set(color_codes = True)

to place and size the graph to desired size and location

  • fig = plt.gcf()
  • fig.set_size_inches(8,6)
  • plt.legend(bbox_to_anchor = (1,1))
In [ ]:
t = sns.load_dataset('tips')
In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
In [ ]:
sns.scatterplot(x='tip', y='total_bill', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975d668890>
In [ ]:
sns.scatterplot(x='total_bill', y='tip', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975d668b10>

the above plot conveys that tip is proportional to bill amount, apart from some outliners

In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
In [ ]:
sns.scatterplot(x='tip', y='smoker', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975d06aed0>
In [ ]:
t.groupby(t['smoker']).count()
Out[ ]:
total_bill tip sex day time size
smoker
Yes 93 93 93 93 93 93
No 151 151 151 151 151 151
In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
In [ ]:
sns.scatterplot(x='total_bill', y='size', data=t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975b1d8610>
In [ ]:
size = t.groupby(['size']).count()
In [ ]:
size
Out[ ]:
total_bill tip sex smoker day time
size
1 4 4 4 4 4 4
2 156 156 156 156 156 156
3 38 38 38 38 38 38
4 37 37 37 37 37 37
5 5 5 5 5 5 5
6 4 4 4 4 4 4
In [ ]:
sns.scatterplot(x='tip', y='size', data=t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975b0fcb90>
In [ ]:
t.info(); t.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
Out[ ]:
total_bill tip size
count 244.000000 244.000000 244.000000
mean 19.785943 2.998279 2.569672
std 8.902412 1.383638 0.951100
min 3.070000 1.000000 1.000000
25% 13.347500 2.000000 2.000000
50% 17.795000 2.900000 2.000000
75% 24.127500 3.562500 3.000000
max 50.810000 10.000000 6.000000
In [ ]:
t.head(1)
Out[ ]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
In [ ]:
sns.scatterplot(x='total_bill', y='tip', hue='smoker', data=t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975b08c2d0>
In [ ]:
sns.scatterplot(x='total_bill', y='tip', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975b0f0890>
In [ ]:
t['frac_tip'] = t['tip']/t['total_bill']
In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size frac_tip
0 16.99 1.01 Female No Sun Dinner 2 0.059447
1 10.34 1.66 Male No Sun Dinner 3 0.160542
In [ ]:
sns.scatterplot(x='total_bill', y='frac_tip', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975ac14850>
In [ ]:
t['tTipFrac'] = (t['tip'] + t['total_bill']) / t['total_bill']
In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size frac_tip tTipFrac
0 16.99 1.01 Female No Sun Dinner 2 0.059447 1.059447
1 10.34 1.66 Male No Sun Dinner 3 0.160542 1.160542
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tTipFrac', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975aa2f490>
In [ ]:
t.head()
Out[ ]:
total_bill tip sex smoker day time size frac_tip tTipFrac
0 16.99 1.01 Female No Sun Dinner 2 0.059447 1.059447
1 10.34 1.66 Male No Sun Dinner 3 0.160542 1.160542
2 21.01 3.50 Male No Sun Dinner 3 0.166587 1.166587
3 23.68 3.31 Male No Sun Dinner 2 0.139780 1.139780
4 24.59 3.61 Female No Sun Dinner 4 0.146808 1.146808
In [ ]:
t.drop('tTipFrac', inplace = True, axis=1)
In [ ]:
t.head()
Out[ ]:
total_bill tip sex smoker day time size frac_tip
0 16.99 1.01 Female No Sun Dinner 2 0.059447
1 10.34 1.66 Male No Sun Dinner 3 0.160542
2 21.01 3.50 Male No Sun Dinner 3 0.166587
3 23.68 3.31 Male No Sun Dinner 2 0.139780
4 24.59 3.61 Female No Sun Dinner 4 0.146808
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975af4bdd0>
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'sex')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a958590>
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'smoker')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a990210>
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'day')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975aa2fc90>
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'size')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a808810>
In [ ]:
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time', style='sex')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a1ba150>
In [ ]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time', style='sex', size='size')
plt.legend(bbox_to_anchor=(1, 1))
Out[ ]:
<matplotlib.legend.Legend at 0x7f9759ee3550>
In [ ]:
sns.regplot(x = "total_bill", y = 'tip', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9759edc050>
In [ ]:
t.head(2)
Out[ ]:
total_bill tip sex smoker day time size frac_tip
0 16.99 1.01 Female No Sun Dinner 2 0.059447
1 10.34 1.66 Male No Sun Dinner 3 0.160542
In [ ]:
sns.regplot(x = "total_bill", y = 'frac_tip', data = t)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a0c2450>
In [ ]:
sns.regplot(x = "total_bill", y = 'tip', data = t, marker="+")
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f975a5f91d0>
In [ ]:
sns.regplot(x = "total_bill", y = 'tip', data = t, marker=".")
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9759fde150>
In [ ]:
d = sns.load_dataset('diamonds')
In [ ]:
sns.scatterplot('x', 'price', data = d.sample(1000))
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9759c55550>
In [ ]:
sns.regplot('x', 'price', data = d.sample(1000))
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9759c9ee10>
In [ ]:
sns.regplot('x', 'price', data = d.sample(1000), order=2)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9759b8f650>
In [ ]:
ds = d.sample(1000)
In [ ]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = ds, order = 2, marker="+")
plt.legend(bbox_to_anchor=(1, 1))
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
No handles with labels found to put in legend.
Out[ ]:
<matplotlib.legend.Legend at 0x7f97599c1c90>
In [ ]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = ds, order=2)
plt.legend(bbox_to_anchor=(1, 1))
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
No handles with labels found to put in legend.
Out[ ]:
<matplotlib.legend.Legend at 0x7f97599d3850>
In [ ]:
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = d, order=2)
plt.legend(bbox_to_anchor=(1, 1))
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
No handles with labels found to put in legend.
Out[ ]:
<matplotlib.legend.Legend at 0x7f975998f710>
In [ ]:
ds = sns.load_dataset("tips")
g = sns.FacetGrid(ds, col="time")
In [ ]:
g = sns.FacetGrid(ds, col="time")
g.map(sns.histplot, "tip")
Out[ ]:
<seaborn.axisgrid.FacetGrid at 0x7f9759ce6a50>
In [ ]:
g = sns.FacetGrid(ds, col="sex", hue="smoker")
g.map(sns.scatterplot, "total_bill", "tip", alpha=.7)
g.add_legend()
Out[ ]:
<seaborn.axisgrid.FacetGrid at 0x7f9759b74f10>
In [ ]:
g = sns.FacetGrid(ds, col="smoker", row='time')
g.map(sns.scatterplot, "total_bill", "tip", alpha=.7)
g.add_legend()
Out[ ]:
<seaborn.axisgrid.FacetGrid at 0x7f9756a724d0>
In [ ]:
g = sns.FacetGrid(ds, col="day", height=5, aspect=.7)
g.map(sns.barplot, "sex", "total_bill", order=["Male", "Female"])
Out[ ]:
<seaborn.axisgrid.FacetGrid at 0x7f9756436ad0>
In [ ]:
sns.lmplot(x="size", y="tip", data=ds, x_estimator=np.mean);
In [ ]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')

x = t['tip']
y = t['total_bill']
z = t['size']

ax.set_xlabel("tip")
ax.set_ylabel("total bill")
ax.set_zlabel("size")

ax.scatter(x, y, z)

plt.show()
In [ ]:
!pip install nbconvert
In [ ]:
%shell jupyter nbconvert --to html /content/testfile.ipynb