Difference makes the DIFFERENCE
import numpy as np
import pandas as pd
df = pd.read_csv('/content/agriculture_ds.csv')
df.head()
df[df.isna().any(axis=1)]
df[df['State_Name'].isnull()]
df[df['District_Name'].isnull()]
df[df['Crop_Year'].isnull()]
df[df['State_Name'] == "Andhra Pradesh"]
df[df['State_Name'] == "Andhra Pradesh"].count()
df[(df['State_Name'] == "Bihar") & (df['Season'] == "Kharif")]
df[(df['State_Name'] == "Andhra Pradesh") & (df['Crop'] == 'Bajra')]
df[(df['State_Name'] == "Andhra Pradesh") | (df['Season'] == 'Kharif')]
df[(df['State_Name'] == "Andhra Pradesh") & (df['Season'] == "Rabi")]
df[(df['State_Name'] == "Andhra Pradesh") & (df['Season'] == "Kharif")]
df[(df['State_Name'] == "Andhra Pradesh") & (df['Season'] == "Whole Year")]
df['Season'] = df['Season'].str.strip()
df['Season Len'] = df['Season'].str.len()
df.head()
df[(df['State_Name'] == "Andhra Pradesh") & (df['Season'] == "Kharif")]
df[(df['State_Name'] == "Andhra Pradesh") & (df['Season'] == "Rabi")]
df[['State_Name', 'Season', 'Season Len']]
df.drop('Season Len', axis=1, inplace=True)
df.head(2)
df['Crop Len'] = df['Crop'].str.len()
df.head(2)
df['State_Name len'] = df['State_Name'].str.len()
df.head(3)
df[['State_Name', 'State_Name len']]
df['Dist Len'] = df['District_Name'].str.len()
df.head(2)
df[['District_Name', 'Dist Len']]
df.head(2)
df.drop(['State_Name len', 'Dist Len', 'Crop Len'], inplace = True, axis = 1)
df.head(2)
df[df['Production'] == '=']
df.sort_values('Production', inplace=True, ascending=False)
df.head()
df[df['Production'] == "="]
# index_names = df[ df['Production'] == 21 ].index
index_names = df[ df['Production'] == "=" ].index
df.drop(index_names, inplace = True)
df[df['Production'] == "="]
df.head(2)
df
df.dropna(inplace = True)
df
df['Crop_Year'] = df['Crop_Year'].astype(str)
df.head(2)
df.dtypes
df['Production'] = pd.to_numeric(df['Production'])
df.dtypes
df.to_csv('/content/clean_agri_ds.csv')
df.describe()
df.head(3)