Difference makes the DIFFERENCE
import numpy as np
import pandas as pd
import seaborn as sns
planets = sns.load_dataset('planets')
planets = pd.DataFrame(planets)
dfplanets = planets
dfplanets.info()
dfplanets.dropna(inplace=True)
dfplanets.info()
dfplanets.head(4)
dfplanets[dfplanets['year'] > 2010]
dfplanets[dfplanets['year'] == 2010]
dfplanets.head(3)
dfplanets[dfplanets['method'] == 'Radial Velocity']
dfplanets[dfplanets['method'] == "Transit"]
dfplanets.mean()
dfplanets.median()
dfplanets.quantile([.75, .85])
dfplanets.quantile(.75)
type(dfplanets['distance'].quantile(0.75))
dfplanets.quantile(0.75)
df_ = dfplanets.copy()
per75 = df_.distance.quantile(0.75)
per75
df_.info()
for i, r in df_.iterrows():
if r['year'] < 2010:
df_.drop(i, inplace=True)
continue
if r['method'] != 'Radial Velocity' and r['method'] != 'Transit':
df_.drop(i, inplace = True)
continue
if r['distance'] < per75:
df_.drop(i, inplace = True)
continue
df_.info()
df_ = dfplanets.copy()
df_.describe()
df_ = df_[
(df_['year'] >= 2010) &
((df_['method'] == 'Radial Velocity') | (df_['method'] == 'Transit')) &
(df_['distance'] > per75)
]
df_.describe()
df_ = dfplanets.copy()
per75 = df_.distance.quantile(0.75)
per75
# Code executes but the results are not correct
df_ = df_.query('(year >= 2010) & (method == "Radial Velocity" | method == "Transit") & (distance > @@per75)')
df_.describe()