
Scatter Plot and Regression plot with "tips" dataset from seaborn package

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt['dark_background'])
import seaborn as sns
sns.set(color_codes = True)

to place and size the graph to desired size and location

  • fig = plt.gcf()
  • fig.set_size_inches(8,6)
  • plt.legend(bbox_to_anchor = (1,1))
t = sns.load_dataset('tips')
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
sns.scatterplot(x='tip', y='total_bill', data = t)
sns.scatterplot(x='total_bill', y='tip', data = t)
the above plot conveys that tip is proportional to bill amount, apart from some outliners

sns.scatterplot(x='tip', y='smoker', data = t)
sns.scatterplot(x='total_bill', y='size', data=t)
size = t.groupby(['size']).count()
sns.scatterplot(x='tip', y='size', data=t)
In [ ]:; t.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
total_bill tip size
count 244.000000 244.000000 244.000000
mean 19.785943 2.998279 2.569672
std 8.902412 1.383638 0.951100
min 3.070000 1.000000 1.000000
25% 13.347500 2.000000 2.000000
50% 17.795000 2.900000 2.000000
75% 24.127500 3.562500 3.000000
max 50.810000 10.000000 6.000000
sns.scatterplot(x='total_bill', y='tip', hue='smoker', data=t)
sns.scatterplot(x='total_bill', y='tip', data = t)
t['frac_tip'] = t['tip']/t['total_bill']
sns.scatterplot(x='total_bill', y='frac_tip', data = t)
t['tTipFrac'] = (t['tip'] + t['total_bill']) / t['total_bill']
sns.scatterplot(x = "total_bill", y = 'tTipFrac', data = t)
2 21.01 3.50 Male No Sun Dinner 3 0.166587 1.166587
3 23.68 3.31 Male No Sun Dinner 2 0.139780 1.139780
4 24.59 3.61 Female No Sun Dinner 4 0.146808 1.146808
t.drop('tTipFrac', inplace = True, axis=1)
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time')
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'sex')
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'smoker')
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'day')
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'size')
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time', style='sex')
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.scatterplot(x = "total_bill", y = 'tip', data = t, hue = 'time', style='sex', size='size')
plt.legend(bbox_to_anchor=(1, 1))
sns.regplot(x = "total_bill", y = 'tip', data = t)
sns.regplot(x = "total_bill", y = 'frac_tip', data = t)
sns.regplot(x = "total_bill", y = 'tip', data = t, marker="+")
sns.regplot(x = "total_bill", y = 'tip', data = t, marker=".")
d = sns.load_dataset('diamonds')
sns.scatterplot('x', 'price', data = d.sample(1000))
sns.regplot('x', 'price', data = d.sample(1000))
sns.regplot('x', 'price', data = d.sample(1000), order=2)
ds = d.sample(1000)
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = ds, order = 2, marker="+")
plt.legend(bbox_to_anchor=(1, 1))
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = ds, order=2)
plt.legend(bbox_to_anchor=(1, 1))
fig = plt.gcf()
fig.set_size_inches(15, 6)
sns.regplot('x', 'price', data = d, order=2)
plt.legend(bbox_to_anchor=(1, 1))
ds = sns.load_dataset("tips")
g = sns.FacetGrid(ds, col="time")
g = sns.FacetGrid(ds, col="time"), "tip")
g = sns.FacetGrid(ds, col="sex", hue="smoker"), "total_bill", "tip", alpha=.7)
g = sns.FacetGrid(ds, col="smoker", row='time'), "total_bill", "tip", alpha=.7)
g = sns.FacetGrid(ds, col="day", height=5, aspect=.7), "sex", "total_bill", order=["Male", "Female"])
sns.lmplot(x="size", y="tip", data=ds, x_estimator=np.mean);
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')

x = t['tip']
y = t['total_bill']
z = t['size']

ax.set_ylabel("total bill")

ax.scatter(x, y, z)
