Difference makes the DIFFERENCE
!pip3 install plotly_express
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly_express as px
df = pd.read_csv('/content/clean_agri_ds.csv')
df.State_Name.unique()
df.dtypes
df.head(3)
df[df.isna().any(axis=1)]
df[df['Production'] == "="]
df.drop(['Len State', 'Len Dist', 'Len Season', 'Len Crop'], inplace = True, axis=1)
df.head(3)
df.describe()
df.set_index(['Unnamed: 0'])
df.rename(columns = {'Unnamed: 0':"ColIndex"}, inplace = True)
df.head(2)
df.set_index(['ColIndex'])
df.head(2)
df.sort_values(['Production'], inplace = True, ascending = False)
df.head(2)
dfap = df[df['State_Name'] == "Andhra Pradesh"]
dfap.head(3)
dfap_group = df[df['State_Name'] == "Andhra Pradesh"].groupby('District_Name').sum()
dfap_group
dfap.head(2)
dfap.rename(columns = {"Unnamed: 0":"ColIndex"}, inplace=True)
dfap.set_index('ColIndex')
dfap.describe()
dfap['Crop_Year'] = df['Crop_Year'].astype(str)
dfap['ColIndex'] = dfap['ColIndex'].astype(str)
dfap.describe()
sns.lineplot(x="District_Name", y="Area", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
sns.lineplot(x="District_Name", y="Production", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
sns.barplot(x="District_Name", y="Area", data = dfap)
plt.xticks(rotation = 90)
sns.barplot(x="District_Name", y="Production", data = dfap)
plt.xticks(rotation = 90)
dfap['District_Name'].unique()
dfap.describe()
dfap.dtypes
source: https://pandas.pydata.org/docs/getting_started/intro_tutorials/04_plotting.html
dfap.plot(figsize = (8,4))
dfap.groupby('District_Name')['Area'].sum().plot.bar()
dfap.groupby('District_Name').sum().plot(kind='line')
plt.xticks(rotation=90)
dfap.sort_values(by="District_Name", inplace=True)
dfap.plot.scatter(x='Area', y='Production')
dfap.plot.box()
fig, axs = plt.subplots(figsize = (12, 6));
# dfap.plot.area(figsize = (12, 6), subplots = True)
dfap.plot.line(ax=axs, subplots = True, marker=".");
fig.savefig('Area_Production.png')
dfap
dfap.sort_values("Production", ascending = True, inplace = True)
dfaps = dfap['Production'].cumsum()
dfaps.plot()
plt.xticks(rotation = 90)
dfapa = dfap['Area'].cumsum()
dfapa.plot()
plt.xticks(rotation = 90)
plt.figure()
dfap.Production.plot(kind='density')
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar();
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
plt.legend(bbox_to_anchor = [1,1]) # to place legend outside the graph, towards the right top
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.barh(stacked = True);
dfap['Production'].diff().hist()
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
dfapb = dfap.sample(1000).groupby('Season').sum()
dfapb.plot.box()
dfapb
dfapb = dfap.sample(10)
dfapb.boxplot()
dfap.sample(10).boxplot(by='Crop_Year', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.sample(10).boxplot(by='Season', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.sample(10).boxplot(by='Crop', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.head(2)
dfap.sample(10).boxplot(by="Season");
plt.xticks(rotation=90);
dfap.sample(10).boxplot(by=["Season", 'Crop'],
figsize = (8,6), layout=(2, 1));
plt.xticks(rotation=90);
dfap.plot.scatter(x="Production", y='Area')
dfap.plot.scatter(x="Production", y='Area', s=5)