Difference makes the DIFFERENCE
import numpy as np
import pandas as pd
arr = np.random.randint(0, 10, (5, 3))
arr
df = pd.DataFrame(arr)
df
df.values
df.index
df.columns
for c in df.columns:
print(c)
df.values[0]
df.index = ['R1', 'R2', 'R3', 'R4', 'R5']
df.columns = ['C1', 'C2', 'C3']
df
df.loc['R3', 'C2']
df.iloc[2, 1]
type(df.iloc[2:4, 1:3])
df.loc['R3':'R5', 'C2':'C3']
df.iloc[0]
df.iloc[:, 0]
df.shape
df.T
Task on creating dataframes
def create_df(nRows, nCols, maxRand=10):
arr = np.random.randint(0, maxRand, (nRows, nCols))
df = pd.DataFrame(arr)
df.index = ['R' + str(x) for x in np.arange(1, nRows+1)]
df.columns = ['C' + str(x) for x in np.arange(1, nCols+1)]
return df
create_df(5, 3)
create_df(2, 5)
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146],
index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
diameter = pd.Series([4879, 12104, 12756, 3475, 6792, 142984, 120536, 51118, 49528, 2370],
index=['Mercury', 'Venus', 'Earth', 'Moon', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
mass
diameter
df = pd.DataFrame({'Mass': mass, 'Diameter': diameter})
df
df['Mass']
df['Diameter']
df['Earth']
df['Mass']['Earth']
df.Mass.Earth
df['pop'] = 0
df
df['pop']['Earth'] = 8000000000
df
df['Mass'] is df.Mass
df['pop'] is df.pop
df
df
df.loc['Earth']
df.loc[:,'Mass']
def create_mean_row(df):
# df.loc['Col_Mean'] = [np.mean(df[col]) for col in df.columns]
df.loc['Col_Mean'] = df.mean()
return df
df
df.loc['Col_Mean'] = 0
df
np.mean(df['Mass'])
df.drop('Col_Mean', inplace=True)
df.drop('pop', axis=1, inplace=True)
df
np.mean(df['Mass'])
df.loc['Col_Mean'] = [np.mean(df['Mass']), np.mean(df['Diameter'])]
df
df.drop('Col_Mean')
create_mean_row(df)
df = create_df(5, 3)
df
df.mean()
df.mean(axis=1)
df['Row_Mean'] = df.mean(axis=1)
df.loc['Col_Mean'] = df.mean()
df
df.median()
df.min()
df.max()
df.quantile(0.25)
df.drop('Row_Mean', axis=1)
df.describe()
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146],
index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
diameter = pd.Series([4879, 12104, 12756, 3475, 6792, 142984, 120536, 51118, 49528, 2370],
index=['Mercury', 'Venus', 'Earth', 'Moon', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
planets = pd.DataFrame({'mass': mass, 'diameter': diameter})
planets.describe()
import seaborn as sns
df = sns.load_dataset('planets')
df.info()
df.head()
df.tail()
df.describe()
Go through each row of the dataframe and delete it (drop) if any of the columns is null
for r in df.index:
for c in df.columns:
if pd.isnull(df.loc[r, c]):
df.drop(r, inplace=True)
break
df.describe()
for i, r in df.iterrows():
print(i)
print(r)
break
for i, r in df.iterrows():
if pd.isnull(r).any():
df.drop(i, inplace=True)
df.describe()
df.dropna(inplace=True)
df.describe()
Filter and show only those rows which have planets that are found in the 2010s and method is 'Radial Velocity' or 'Transit' and distance is large (> 75 percentile)
df_ = df.copy()
per_75 = df.distance.quantile(0.75)
for i, r in df_.iterrows():
if r['year'] < 2010:
df_.drop(i, inplace=True)
continue
if r['method'] != 'Radial Velocity' and r['method'] != 'Transit':
df_.drop(i, inplace=True)
continue
if r['distance'] < per_75:
df_.drop(i, inplace=True)
continue
df_.describe()
df_.tail()
per_75
df_ = df.copy()
df_ = df_[
(df_['year'] >= 2010) &
((df_['method'] == 'Radial Velocity') | (df_['method'] == 'Transit')) &
(df_['distance'] > per_75)
]
df_.describe()
Modify the method column to have only the abbreviation of each method
df.method.unique()
s = 'Orbital Brightness Modulation'
''.join([x[0] for x in s.split(' ')])
short_names = {}
for s in df.method.unique():
short_names[s] = ''.join([x[0] for x in s.split(' ')])
print(short_names)
for i, r in df.iterrows():
df.loc[i, 'short_method'] = short_names.get(r['method'], r['method'])
df.tail()
df = sns.load_dataset('planets')
def shorten_method(s):
return short_names.get(s, s)
df['short_method'] = df['method'].apply(shorten_method)
df.head()
Count the number of planets discovered for each method type
d = {}
for m in df.method.unique():
d[m] = df[df.method == m]['method'].count()
print(d)
df.groupby('method')['method'].count()
d = {}
for m in df.method.unique():
d[m] = df[df.method == m]['distance'].mean()
print(d)
df.groupby('method')['distance'].mean()
Find out what fraction of planets have been found in the last decade (i.e., in 2010s) across each method type
s_2010s = df[df.year >= 2010].groupby('method')['method'].count()
s_allTime = df.groupby('method')['method'].count()
s_2010s/s_allTime
Find a dataset of Nifty numbers for 2018 and 2019 - daily numbers open, close, high, low
nifty50_2018 = pd.read_csv('NIFTY50_2018.csv')
nifty50_2018.head()
nifty50_2018 = pd.read_csv('NIFTY50_2018.csv', index_col=0)
nifty50_2018.head()
nifty50_2018.loc['31 Dec 2018']
nifty50_2018['Open']
nifty50_2019 = pd.read_csv('NIFTY50_2019.csv', index_col=0)
nifty50_2019.head()
print(nifty50_2018.shape, nifty50_2019.shape)
nifty50 = pd.concat([nifty50_2018, nifty50_2019])
nifty50.shape
niftynext50_2019 = pd.read_csv('NIFTYNext50_2019.csv', index_col = 0)
niftynext50_2019.head()
nifty_2019 = pd.concat([nifty50_2019, niftynext50_2019], axis=1)
nifty_2019.shape
nifty_2019.head()
nifty_2019 = pd.concat([nifty50_2019, niftynext50_2019], axis=1,
keys=['nifty50', 'niftynext50'])
nifty_2019.shape
nifty_2019.head()
nifty_2019['nifty50'].loc['31 Dec 2019']
Tasks on the NIFTY datasets:
In 2019, in how many days was the NIFTY50 volatile (high > 105% of low)
In 2019, in how many days was the NIFTYNEXT50 volatile (high > 105% of low)
In 2019, how many days belonged to the four classes NIFTY50 volatile / non-volatile and NIFTYNext50 volatile / non-volatile
Compute the mean, median, std, var of closing values for each weekday in NIFTY50 for 2019
Compute the mean, median, std, var of closing values for each month in NIFTY50 for 2019
On the days in which NIFTY50 closed higher than the open, what was the mean of (close - open) for NIFTYNext50
In 2019, how many days had the day's high lower than the previous day's low in NIFTY50
In 2019, on how many days did the day's close exceed the 30 day moving average in NIFTY50 (exclude first month)