Difference makes the DIFFERENCE
! pip3 install plotly==5.7.0
! pip3 install chart_studio
import pandas as pd
import numpy as np
import seaborn as sns
import plotly
import plotly.io as pio
import cufflinks as cf
from plotly.offline import download_plotlyjs, iplot, plot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline()
# ~~~~~~~~~~~~~~~~~~ Code for Plotly and CuffLinks ~~~~~~~~~~~~~~~~~~~~~
# reference: https://www.repath.in/Plotly-and-Cuff-Links/
#~~~~~~~~~~~~~~~~ the following function helps Google Colab to display graphs
# and should be used in every cell, where ever a graph needs to be plotted ~~~~~~~~~~~~~~~~~~~
def configure_plotly_browser_state():
import IPython
display(IPython.core.display.HTML('''
<script src="/static/components/requirejs/require.js"></script>
<script>
requirejs.config({
paths: {
base: '/static/base',
plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
},
});
</script>
'''))
# configure_plotly_browser_state()
# df.plot()
df = pd.read_csv('/content/agriculture_ds.csv')
df.Production.isnull().sum()
df.shape()
df.shape
df.head()
sns.heatmap(df.isnull(), cmap = 'Blues_r')
df.head(1)
df.State_Name.unique()
df.Crop_Year.unique()
df.Season.unique()
df.Crop.unique()
df.dtypes
df.info()
df = pd.read_csv('/content/agriculture_ds.csv', na_values = "=")
df.Production.isnull().sum()
sns.heatmap(df.isnull(), cmap = 'Blues_r')
df.dtypes
df.shape
df.Production.isnull().count()
df.Production.isnull().sum()
print ("Null values percentage : " , (3727/246091)*100)
df.dropna(inplace = True)
df.shape
watch - Hans Roslng video from Ted on Data Visualisation
sns.kdeplot(df.Production)
sns.boxplot(df.Production)
sns.boxplot(df.Production, hue=df.State_Name)
sns.boxplot(df.Area)
sns.kdeplot(df.Area)
df[df.State_Name == 'Karnataka']
df[df.State_Name == "Karnataka"]['District_Name'].unique()
df_total = df.loc[df['District_Name'].str.contains('Total', case = False)]
df_total.count()
df_total = df.loc[df['State_Name'].str.contains('Total', case = False)]
df_total
df_rice = df.loc[df['Crop'].str.contains('rice', case = False)]
df_rice
df_rice_wheat = df.loc[df['Crop'].str.contains("wheat", case = False) |
df['Crop'].str.contains('rice', case = False)]
df_rice_wheat
df.groupby(['State_Name', 'Crop', 'Crop_Year']).sum()
df[df.State_Name == "West Bengal"]['Crop'].unique()
df.groupby(['State_Name', 'Crop_Year']).sum()
df_ = df.groupby(['State_Name', 'Crop_Year']).sum()
df_.head()
df_.reset_index(inplace = True)
df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()
df[['State_Name', 'Crop']].groupby('State_Name').count()
df_ani = df.loc[df['State_Name'].str.contains('Andaman and Nicobar Islands', case = False)]
df_ani
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andaman and Nicobar Islands'])
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Odisha'])
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andhra Pradesh'])
sns.lineplot(x='Crop_Year', y='Production', data = df[df.State_Name == 'Andhra Pradesh'])
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
import matplotlib.pyplot as plt
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = [1,1])
# from matplotlib.cbook import boxplot_stats
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = (1, 1))
https://stackoverflow.com/questions/47230817/plotly-notebook-mode-with-google-colaboratory
import plotly.graph_objects as go
fig = go.Figure(data=[go.Bar(y=[2, 1, 3, 5, 8, 9, 12, 11, 2, 4, 2, 3])],
layout_title_text="A Figure Displayed with the 'colab' Renderer")
fig.show(renderer="colab")
!pip3 install plotly_express
import plotly_express as px
df.head(1)
df_ = df.groupby(['State_Name', 'Crop_Year']).sum()
df_.head()
df_.reset_index(inplace = True)
df_.head()
df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()
df_.head(2)
df_.sort_values('Crop_Year', inplace = True)
# px.scatter(df_, x = "Area", y='Production', animation_frame = 'Crop_Year',
# animation_group = "State_Name", color = "State_Name")
import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = "Area", y = 'Production',
animation_frame = 'Crop_Year',animation_group = "State_Name",
color = "State_Name"))
# layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states")
fig.show(renderer="colab")
import plotly.graph_objects as go
fig = go.Figure(px.bar(df_, x = "Area", y='Production',
animation_frame = 'Crop_Year',animation_group = "State_Name",
color = "State_Name"),
layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states"
)
fig.show(renderer="colab")
import plotly.graph_objects as go
fig = go.Figure(px.bar(df_, x = "Crop_Year", y='Area', color = "Crop_Year"),
layout_title_text="Crop year on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")
import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area', color = "State_Name"),
layout_title_text="State on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")
import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area', color = "Crop_Year"),
layout_title_text="State on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")
import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area'),
layout_title_text="State on X and Area on Y"
)
fig.show(renderer="colab")
df = px.data.gapminder()
df
df = px.data.gapminder().query("year == 2007")
df
df.head()
configure_plotly_browser_state
import plotly.graph_objects as go
fig = go.Figure(px.strip(df, x = "lifeExp", hover_name = 'country', color = 'continent'))
fig.show(renderer="colab")
df = pd.read_csv('/content/agriculture_ds.csv', na_values = "=")
df.head(1)
df.dropna(inplace = True)
df_[(df_.State_Name == 'Kerala') & (df_.Crop_Year == 2000)]
df[(df.State_Name == 'Kerala') & (df.Crop_Year == 2000)].sort_values('Production')
df_ = df[df.Crop.isin(['Rice', 'Wheat', 'Maize', "Ragi"])]
df_
df_ = df[df.Crop.isin(['Rice', 'Wheat', 'Maize', "Ragi"])].groupby(['State_Name', 'Crop_Year']).sum()
df_
df_.reset_index(inplace = True)
df_.head(1)
df_.sort_values('Crop_Year', inplace = True)
import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x='Area', y = 'Production',
animation_frame = 'Crop_Year',
animation_group = 'State_Name', color = 'State_Name'))
fig.show(renderer="colab")
df_['Efficiency'] = df_['Production'] / df_['Area']
df_.head(2)
import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = 'Area', y = 'Efficiency', size = 'Production',
animation_frame = "Crop_Year", animation_group = "State_Name",
color = 'State_Name'))
fig.show(renderer="colab")
df_.head(2)
import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = 'Area', y = 'Efficiency', size = 'Production',
animation_frame = "Crop_Year", animation_group = "State_Name", range_y = [0.75, 5], range_x = [-1E6, 20E6],
color = 'State_Name'))
fig.show(renderer="colab")
df[(df.State_Name == 'Kerala') & (df.Crop_Year == 2000)].sort_values('Production')
df[df.Crop.isin(['Rice', "Wheat", "Ragi", "Maize"])]
df_ = df[df.Crop.isin(['Rice', "Wheat", "Ragi", "Maize"])].groupby(['State_Name', 'Crop_Year']).sum()
df_.head(2)
df_.reset_index(inplace=True)
df_.sort_values('Crop_Year', inplace=True)
df_.head(2)
fig = go.Figure(px.scatter(df_, x="Area", y='Production',
animation_frame = "Crop_Year",
animation_group='State_Name', color = "State_Name"))
fig.show(renderer="colab")
df_['Efficiency'] = df_['Production'] / df_['Area']
fig = go.Figure(px.scatter(df_, x="Area", y='Efficiency', size='Production',
animation_frame = "Crop_Year",
animation_group='State_Name', color = "State_Name"))
fig.show(renderer="colab")
address the values that are going ## beyong the graph area
fig = go.Figure(px.scatter(df_, x="Area", y='Efficiency', size='Production',
animation_frame = "Crop_Year",
animation_group='State_Name', color = "State_Name",
range_y = [0.75, 5], range_x = [-1E6, 20E6]))
fig.show(renderer="colab")
some other exercises like: