Difference makes the DIFFERENCE
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('/content/clean_agri_ds.csv')
df.head(2)
df.describe()
df.drop(['Len State', 'Len Dist', 'Len Season', 'Len Crop'], inplace = True, axis=1)
df.head(2)
df.set_index(['Unnamed: 0'])
df.rename(columns = {'Unnamed: 0':"ColIndex"}, inplace = True)
df.set_index(['ColIndex'])
df['Crop_Year'] = df['Crop_Year'].astype(str)
df['ColIndex'] = df['ColIndex'].astype(str)
df.describe()
DataFrame.query(expr, inplace=False, **kwargs)
df.loc[df.State_Name == "Andhra Pradesh", 'District_Name'].unique()
df.loc[df['State_Name'] == 'Andhra Pradesh']['District_Name'].unique()
df.query('State_Name == "Andhra Pradesh"')['District_Name'].unique()
df.query('State_Name == "Andhra Pradesh" and District_Name == "VIZIANAGARAM"')
df.query('State_Name == "Andhra Pradesh" and District_Name == "VIZIANAGARAM" and Season == "Rabi"')
df.query('State_Name == "Andhra Pradesh" and District_Name == "VIZIANAGARAM" and Season == "Rabi" and Crop_Year == "2011"' )
df.query('State_Name == "Andhra Pradesh" and District_Name == "VIZIANAGARAM" and Crop_Year == "2011" and Season == "Rabi" and Crop == "Onion"')
df_dist = df.query('State_Name == "Andhra Pradesh" and District_Name == "VIZIANAGARAM"')
df_dist.head(1)
def getDistNames_forState(strStateName):
print(df.query('State_Name == "' + strStateName + '"')['Crop'].unique())
getDistNames_forState('Andhra Pradesh')
display(df['State_Name'].unique())
def getDistrict_Names_for_State(strStateName):
print(df.loc[df['State_Name'] == strStateName, "District_Name"].iloc[:].unique())
getDistrict_Names_for_State("Arunachal Pradesh")
getDistrict_Names_for_State("Andhra Pradesh")
def getCropNames_for_State_District(strStateName, strDistName):
print(df.loc[(df['State_Name'] == strStateName) & (df['District_Name'] == strDistName), "Crop"].unique())
getCropNames_for_State_District("Arunachal Pradesh", "WEST KAMENG")
getCropNames_for_State_District("Assam","KARBI ANGLONG" )
df.loc[df['State_Name'] == "Andhra Pradesh", "District_Name"].iloc[:].unique()
getDistrict_Names_for_State("Arunachal Pradesh")
getDistrict_Names_for_State("Bihar")
print(getCropNames_for_State_District("Bihar", "MADHEPURA"))
ArPr_Crops = list(getCropNames_for_State_District("Bihar", "MADHEPURA"))
ArPr_Crops
lstArPrCrops = []
for value in ArPr_Crops:
lstArPrCrops.append(value)
getCropNames_for_State_District("Arunachal Pradesh", 'DIBANG VALLEY')
df.loc[df['State_Name'] == "Andhra Pradesh", "District_Name"].values[:]
DataFrame.filter(items=None, like=None, regex=None, axis=None)
df.filter(["State_Name", "Production", "Area"])
df.filter(["Crop_Year", "State_Name","Area"]).groupby('Crop_Year').sum()
df.filter(regex = '[Aa]')
url = "https://data.chhs.ca.gov/dataset/4a8cb74f-c4fa-458a-8ab1-5f2c0b2e22e3/resource/f3fe42ed-4441-4fd8-bf53-92fb80a246da/download/2021-06-18_topbabynames_1960-2019.csv"
ndf = pd.read_csv(url)
ndf.head()
ndf.describe()
ndf[ndf['Name'].str.startswith('AL')]
ndf[ndf["Name"].str.endswith('RA')]
ndf[ndf['Name'].str.contains("AS")]
ndf[ndf['Name'].str.contains("AS", case = False)]
ndf[ndf['Name'].str.contains("IAN$|INE$", case = False, regex = True)].head()
ndf[ndf['Name'].str.contains("IAN$|INE$", case = False, regex = False)]
ndf[((ndf['Name'] == 'SPPHIA') | (ndf["Name"] == "PAUL"))].head()
ndf[ndf["Name"].isin(["SOPHIA", "PAUL"])].head(4)
ndf.filter(items=['Year', 'Name'], axis=1)
ndf.filter(like='17', axis = 0)
ndf.filter(regex = '^5|8|3', axis=0)
ndf.isnull().count()