Difference makes the DIFFERENCE
!pip3 install plotly_express
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly_express as px
df = pd.read_csv('/content/clean_agri_ds.csv')
df.State_Name.unique()
df.dtypes
df.head(3)
df[df.isna().any(axis=1)]
df[df['Production'] == "="]
df.drop(['Len State', 'Len Dist', 'Len Season', 'Len Crop'], inplace = True, axis=1)
df.head(3)
df.describe()
df.set_index(['Unnamed: 0'])
df.rename(columns = {'Unnamed: 0':"ColIndex"}, inplace = True)
df.head(2)
df.set_index(['ColIndex'])
df.head(2)
df.sort_values(['Production'], inplace = True, ascending = False)
df.head(2)
dfap = df[df['State_Name'] == "Andhra Pradesh"]
dfap.head(3)
dfap_group = df[df['State_Name'] == "Andhra Pradesh"].groupby('District_Name').sum()
dfap_group
dfap.head(2)
dfap.rename(columns = {"Unnamed: 0":"ColIndex"}, inplace=True)
dfap.set_index('ColIndex')
dfap.describe()
dfap['Crop_Year'] = df['Crop_Year'].astype(str)
dfap['ColIndex'] = dfap['ColIndex'].astype(str)
dfap.describe()
sns.lineplot(x="District_Name", y="Area", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
sns.lineplot(x="District_Name", y="Production", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
sns.barplot(x="District_Name", y="Area", data = dfap)
plt.xticks(rotation = 90)
sns.barplot(x="District_Name", y="Production", data = dfap)
plt.xticks(rotation = 90)
dfap['District_Name'].unique()
dfap.describe()
dfap.dtypes
source: https://pandas.pydata.org/docs/getting_started/intro_tutorials/04_plotting.html
dfap.plot(figsize = (8,4))
dfap.groupby('District_Name')['Area'].sum().plot.bar()
dfap.groupby('District_Name').sum().plot(kind='line')
plt.xticks(rotation=90)
dfap.sort_values(by="District_Name", inplace=True)
dfap.plot.scatter(x='Area', y='Production')
dfap.plot.box()
fig, axs = plt.subplots(figsize = (12, 6));
# dfap.plot.area(figsize = (12, 6), subplots = True)
dfap.plot.line(ax=axs, subplots = True, marker=".");
fig.savefig('Area_Production.png')
dfap
dfap.sort_values("Production", ascending = True, inplace = True)
dfaps = dfap['Production'].cumsum()
dfaps.plot()
plt.xticks(rotation = 90)
dfapa = dfap['Area'].cumsum()
dfapa.plot()
plt.xticks(rotation = 90)
plt.figure()
dfap.Production.plot(kind='density')
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar();
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
plt.legend(bbox_to_anchor = [1,1]) # to place legend outside the graph, towards the right top
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.barh(stacked = True);
dfap['Production'].diff().hist()
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
dfapb = dfap.sample(1000).groupby('Season').sum()
dfapb.plot.box()
dfapb
dfapb = dfap.sample(10)
dfapb.boxplot()
dfap.sample(10).boxplot(by='Crop_Year', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.sample(10).boxplot(by='Season', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.sample(10).boxplot(by='Crop', figsize=(10, 3))
plt.xticks(rotation=90)
dfap.head(2)
dfap.sample(10).boxplot(by="Season");
plt.xticks(rotation=90);
dfap.sample(10).boxplot(by=["Season", 'Crop'],
figsize = (8,6), layout=(2, 1));
plt.xticks(rotation=90);
dfap.plot.scatter(x="Production", y='Area')
dfap.plot.scatter(x="Production", y='Area', s=5)
dfap.plot.scatter(x="Area", y='Production', s=2)
dfap.sample(20).plot.hexbin('Area', 'Production', gridsize=15)
dfap.sample(30).Production.plot();
dfap.sample(30).Area.plot(secondary_y = True, style = "o")
plt.tight_layout()
dfap.sample(100).plot(subplots = True, figsize = (6, 6))
dfap.Production.mean()
dfap.Area.mean()
dfap[['Production', 'Area']].mean()
dfap[['Production', 'Area']].describe()
dfap.agg(
{
"Production": ["min", "max", "median", "skew"],
"Area": ["min", "max", "median", "mean"],
}
)
dfap.head(2)
dfap[['Season', 'Area']].groupby('Season').mean()
dfap[['Crop', "Area"]].groupby('Crop').mean()
dfap.groupby('District_Name').mean()
dfap.groupby('District_Name').mean().sort_values('Area', ascending = False)
dfap.groupby('District_Name').mean().sort_values('Production', ascending = False)
dfaps = dfap.groupby('District_Name').mean().sort_values('Production', ascending = False)
dfaps.plot(figsize=(10, 5))
plt.xticks(rotation=90)
dfap_season = dfap.groupby('Season')['Area'].mean()
dfap_season
dfap_season.plot.box()
dfap_season.plot.bar()
dfTest = dfap.groupby('Season')['Area'].median()
ax = dfTest.plot.bar(figsize = (10, 4))
for p in ax.patches:
ax.annotate(np.round(p.get_height(), decimals = 2), (p.get_x() * 1.05, p.get_height() * 1.05))
dfap.groupby('Season')['Area'].mean().plot.bar()
dfap.groupby("Crop")['Area'].mean().plot.bar(figsize = (10, 3))
dfap.groupby("Crop")['Area'].median().plot.bar(figsize = (10, 3))
dfTest = dfap.groupby('District_Name')['Area'].mean()
ax = dfTest.plot.bar(figsize = (10, 4))
for p in ax.patches:
ax.annotate(np.round(p.get_height(), decimals = 2), (p.get_x() * 1.05, p.get_height() * 1.05))
dfap.groupby('District_Name')['Area'].median().plot.bar(figsize = (10, 3))
dfap.groupby(["District_Name", 'Season'])['Area'].mean().plot.bar(figsize = (10, 3))
dfap.groupby(["Season", 'District_Name'])['Area'].mean().plot.bar(figsize = (10, 3))
dfTest = dfap.groupby(["Season", 'District_Name'])['Area'].mean()
ax = dfTest.plot.bar(figsize = (10, 4))
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.05, p.get_height() * 1.05))
dfTest = dfap.groupby(["Season", 'District_Name'])['Area'].mean()
ax = dfTest.plot.bar(figsize = (12, 4))
for p in ax.patches:
ax.annotate(np.round(p.get_height(), decimals = 2), (p.get_x() * 1.05, p.get_height() * 1.05))
dfTest = dfap.groupby(["Season", 'District_Name'])['Area'].mean()
ax = dfTest.plot.barh(figsize = (4, 12))
#for p in ax.patches:
# ax.annotate(np.round(p.get_width(), decimals = 2), (p.get_y() * 1.05, p.get_width() * 1.05))
for i in ax.patches:
plt.text(i.get_width(), i.get_y()+.05,
str(round((i.get_width()), 2)),
fontsize = 10, fontweight ='bold',
color ='grey')
dfap.head()
df.head(2)
dfap.head(2)
dfap.dtypes
dfap.set_index("ColIndex", inplace = True)
dfap.head(2)
dfapTest = dfap.groupby('District_Name').sum()
dfapTest.reset_index(inplace = True)
dfapTest
fig = plt.figure(figsize = (10, 4))
dfapTest = dfap.groupby('District_Name').sum()
dfapTest.reset_index(inplace=True)
plt.bar(dfapTest['District_Name'], dfapTest['Production'])
plt.xticks(rotation = 90)
fig = plt.figure(figsize = (10, 4))
dfapTest = dfap.groupby('District_Name').sum()
dfapTest.reset_index(inplace=True)
plt.barh(dfapTest['District_Name'], dfapTest['Production'])
plt.xticks(rotation = 90)
dfapTest = dfap.groupby('District_Name').sum()
dfapTest.reset_index(inplace=True)
plt.figure(figsize = (15, 3))
plt.subplot(131)
plt.bar(dfapTest['District_Name'], dfapTest['Production'])
plt.xticks(rotation = 90)
plt.subplot(132)
plt.scatter(dfapTest['District_Name'], dfapTest['Production'])
plt.xticks(rotation = 90)
plt.subplot(133)
plt.plot(dfapTest['District_Name'], dfapTest['Production'], linewidth=4)
plt.xticks(rotation = 90)
dfapTest = dfap.groupby('District_Name').sum()
dfapTest.reset_index(inplace=True)
plt.figure(figsize = (15, 3))
plt.subplot(131)
plt.bar(dfapTest['District_Name'], dfapTest['Area'])
plt.xticks(rotation = 90)
plt.subplot(132)
plt.scatter(dfapTest['District_Name'], dfapTest['Area'])
plt.xticks(rotation = 90)
plt.subplot(133)
plt.plot(dfapTest['District_Name'], dfapTest['Area'], linewidth=4)
plt.xticks(rotation = 90)
dfapTest = dfap.groupby('Crop').sum()
dfapTest.reset_index(inplace = True)
dfapTest.sort_values('Production', inplace = True, ascending=False)
plt.figure(figsize = (12, 4))
plt.bar(dfapTest['Crop'], dfapTest["Production"])
plt.xticks(rotation=90);
# dfapTestNoCoconut = dfapNC
dfapNC = dfap.loc[~dfap['Crop'].str.contains('Coconut', case = False)]
dfapNC
dfapNC.groupby('Crop').sum()
dfapNC.reset_index(inplace = True)
plt.figure(figsize = (10, 4));
plt.bar(dfapNC["Crop"], dfapNC['Production']);
plt.xticks(rotation = 90);
dfapNcSc = dfapNC[~ dfapNC['Crop'].str.contains('Sugarcane', case = False)]
dfapNcSc = dfapNcSc[~ dfapNcSc['Crop'].str.contains('Rice', case = False)]
dfapNcSc
dfapNcSc.reset_index(inplace = True)
dfapncsc = dfapNcSc.groupby('Crop').sum()
dfapncsc.reset_index(inplace = True)
plt.figure(figsize = (12, 4));
plt.bar(dfapncsc['Crop'], dfapncsc['Production']);
plt.xticks(rotation = 90);
# plt.xlabel("Crop Names")
#plt.ylabel("Production")
# plt.title(label="excludes Coconut, Sugarcane, Rice", fontsize=20, color='green')
plt.figure(figsize = (12, 4));
plt.bar(dfapncsc['Crop'], dfapncsc['Area']);
plt.xticks(rotation = 90);
dfapncsc
dfap_season
dfapseason = dfap.groupby('Season').sum()
dfapseason
dfapseason.reset_index(inplace = True)
plt.bar(dfapseason['Season'], dfapseason['Production'])
plt.bar(dfapseason['Season'], dfapseason['Area'])
dfapds = dfap.groupby(['District_Name', 'Season']).sum()
dfapds
dfapds.reset_index(inplace = True)
dfapds.to_excel('dfapds_for_graph.xlsx')
df.head()
df.drop('ColIndex', inplace = True, axis=1)
df.head()
dfap.head(2)
dfap.reset_index(inplace = True)
dfap.dtypes
dfap.drop('ColIndex', inplace = True, axis=1)
dfap.head(2)
dfapgrp = dfap.groupby(['Crop_Year', 'District_Name', 'Season', 'Crop']).sum()
dfapgrp.head(2)
dfapgrp.reset_index(inplace = True)
dfapgrp.shape
dfapgrp.head(2)
dfapgrp.to_excel('dfapgrp.xlsx')
dfapgrp.head(2)
plt.bar(dfapgrp['Crop_Year'], dfapgrp['Area'])
plt.xticks(rotation = 90)
dfaptest = dfapgrp.groupby(['Crop_Year', 'Season'])['Area'].sum()
ax = dfaptest.plot.bar(figsize = (15, 4), fontsize=10)
for p in ax.patches:
ax.annotate(np.round(p.get_height(), decimals = 2), (p.get_x() * 1.05, p.get_height() * 1.05))
dfaptest = dfapgrp.groupby(['Crop_Year', 'Season'])['Area'].sum()
ax = dfaptest.plot.barh(figsize = (15, 20), fontsize=10)
for i in ax.patches:
plt.text(i.get_width(), i.get_y()+.05,
str(round((i.get_width()), 2)),
fontsize = 10, fontweight ='bold',
color ='grey')
dftest = dfapgrp.loc[(dfapgrp['District_Name'] == 'GUNTUR')];
dftest.groupby(['Crop_Year', 'Season'])['Area'].sum().plot.bar(figsize = (10, 4));
plt.xticks(rotation = 90);
def plotDistGraph(distName):
dftest = dfapgrp.loc[(dfapgrp['District_Name'] == distName)];
dftest.groupby(['Crop_Year', 'Season'])['Area'].sum().plot.bar(figsize = (10, 4));
plt.xticks(rotation = 90);
dfapgrp.District_Name.unique()
plotDistGraph('VIZIANAGARAM')
plotDistGraph('SRIKAKULAM')
print(dfapgrp.min())
print(dfapgrp.max())
dfTest = dfapgrp[(dfapgrp['District_Name'] == "ANANTAPUR") &
(dfapgrp['Season'] == "Rabi")]
dfTest
def plotDistGraph(distName, season):
dfTest = dfapgrp[(dfapgrp['District_Name'] == distName) &
(dfapgrp['Season'] == season)]
dfTest.groupby(['Crop_Year', 'Season'])['Production'].sum().plot.bar(figsize = (10, 4));
plt.xticks(rotation = 90);
plotDistGraph('ANANTAPUR', 'Rabi')
plotDistGraph('ANANTAPUR', 'Kharif')
dfapgrp.District_Name.unique()
plotDistGraph('VIZIANAGARAM', 'Rabi')
plotDistGraph('VIZIANAGARAM', 'Whole Year')
def plotDistGraph(distName, season):
dfTest = dfapgrp[(dfapgrp['District_Name'] == distName) &
(dfapgrp['Season'] == season)]
dfTest.groupby(['Crop'])['Production'].sum().plot.bar(figsize = (10, 4));
plt.xticks(rotation = 90);
dfapgrp.District_Name.unique()
plotDistGraph('PRAKASAM', 'Whole Year')
plotDistGraph('GUNTUR', 'Whole Year')
plotDistGraph('KURNOOL', 'Whole Year')
def plotGraph(stateName, distName, season):
dfTest = df[(df['State_Name'] == stateName) &
(df['District_Name'] == distName) &
(df['Season'] == season)]
dfTest.groupby(['Crop'])['Production'].sum().plot.bar(figsize = (10, 4));
plt.xticks(rotation = 90);
df.State_Name.unique()
plotGraph('Andhra Pradesh', 'GUNTUR', 'Rabi')
plotGraph('Andhra Pradesh', 'GUNTUR', 'Kharif')
plotGraph('Andhra Pradesh', 'GUNTUR', 'Whole Year')
dfTest = df[(df['State_Name'] == 'Andhra Pradesh') &
(df['District_Name'] == 'GUNTUR') &
(df['Season'] == 'Whole Year')].groupby('Crop').sum()
dfTest