Agriculture OpenSource DataSet
 Visualisation with Plotly Express
agriculture_ds
In [ ]:
! pip3 install plotly==5.7.0
! pip3 install chart_studio
In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly
import plotly.io as pio
In [3]:
import cufflinks as cf
from plotly.offline import download_plotlyjs, iplot, plot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline()
In [4]:
# ~~~~~~~~~~~~~~~~~~ Code for Plotly and CuffLinks ~~~~~~~~~~~~~~~~~~~~~
# reference: https://www.repath.in/Plotly-and-Cuff-Links/

#~~~~~~~~~~~~~~~~ the following function helps Google Colab to display graphs
# and should be used in every cell, where ever a graph needs to be plotted ~~~~~~~~~~~~~~~~~~~

def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))
  
# configure_plotly_browser_state()
# df.plot()
In [6]:
df = pd.read_csv('/content/agriculture_ds.csv')
In [7]:
df.Production.isnull().sum()
Out[7]:
1
In [ ]:
df.shape()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-0e566b70f572> in <module>()
----> 1 df.shape()

TypeError: 'tuple' object is not callable
In [8]:
df.shape
Out[8]:
(38264, 7)
In [9]:
df.head()
Out[9]:
State_Name District_Name Crop_Year Season Crop Area Production
0 Andaman and Nicobar Islands NICOBARS 2000 Kharif Arecanut 1254.0 2000
1 Andaman and Nicobar Islands NICOBARS 2000 Kharif Other Kharif pulses 2.0 1
2 Andaman and Nicobar Islands NICOBARS 2000 Kharif Rice 102.0 321
3 Andaman and Nicobar Islands NICOBARS 2000 Whole Year Banana 176.0 641
4 Andaman and Nicobar Islands NICOBARS 2000 Whole Year Cashewnut 720.0 165
In [10]:
sns.heatmap(df.isnull(), cmap = 'Blues_r')
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efc0fab3ed0>
In [12]:
df.head(1)
Out[12]:
State_Name District_Name Crop_Year Season Crop Area Production
0 Andaman and Nicobar Islands NICOBARS 2000 Kharif Arecanut 1254.0 2000
In [13]:
df.State_Name.unique()
Out[13]:
array(['Andaman and Nicobar Islands', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar'], dtype=object)
In [ ]:
df.Crop_Year.unique()
Out[ ]:
array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2010, 1997, 1998, 1999,
       2007, 2008, 2009, 2011, 2012, 2013, 2014, 2015])
In [ ]:
df.Season.unique()
Out[ ]:
array(['Kharif     ', 'Whole Year ', 'Autumn     ', 'Rabi       ',
       'Summer     ', 'Winter     '], dtype=object)
In [ ]:
df.Crop.unique()
Out[ ]:
array(['Arecanut', 'Other Kharif pulses', 'Rice', 'Banana', 'Cashewnut',
       'Coconut ', 'Dry ginger', 'Sugarcane', 'Sweet potato', 'Tapioca',
       'Black pepper', 'Dry chillies', 'other oilseeds', 'Turmeric',
       'Maize', 'Moong(Green Gram)', 'Urad', 'Arhar/Tur', 'Groundnut',
       'Sunflower', 'Bajra', 'Castor seed', 'Cotton(lint)', 'Horse-gram',
       'Jowar', 'Korra', 'Ragi', 'Tobacco', 'Gram', 'Wheat', 'Masoor',
       'Sesamum', 'Linseed', 'Safflower', 'Onion', 'other misc. pulses',
       'Samai', 'Small millets', 'Coriander', 'Potato',
       'Other  Rabi pulses', 'Soyabean', 'Beans & Mutter(Vegetable)',
       'Bhindi', 'Brinjal', 'Citrus Fruit', 'Cucumber', 'Grapes', 'Mango',
       'Orange', 'other fibres', 'Other Fresh Fruits', 'Other Vegetables',
       'Papaya', 'Pome Fruit', 'Tomato', 'Rapeseed &Mustard', 'Mesta',
       'Cowpea(Lobia)', 'Lemon', 'Pome Granet', 'Sapota', 'Cabbage',
       'Peas  (vegetable)', 'Niger seed', 'Bottle Gourd', 'Sannhamp',
       'Varagu', 'Garlic', 'Ginger', 'Oilseeds total', 'Pulses total',
       'Jute', 'Peas & beans (Pulses)', 'Blackgram', 'Paddy', 'Pineapple',
       'Barley', 'Khesari', 'Guar seed', 'Moth',
       'Other Cereals & Millets', 'Cond-spcs other', 'Turnip', 'Carrot',
       'Redish', 'Arcanut (Processed)', 'Atcanut (Raw)',
       'Cashewnut Processed', 'Cashewnut Raw', 'Cardamom', 'Rubber',
       'Bitter Gourd', 'Drum Stick', 'Jack Fruit', 'Snak Guard',
       'Pump Kin', 'Tea', 'Coffee', 'Cauliflower', 'Other Citrus Fruit',
       'Water Melon', 'Total foodgrain', 'Kapas', 'Colocosia', 'Lentil',
       'Bean', 'Jobster', 'Perilla', 'Rajmash Kholar',
       'Ricebean (nagadal)', 'Ash Gourd', 'Beet Root', 'Lab-Lab',
       'Ribed Guard', 'Yam', 'Apple', 'Peach', 'Pear', 'Plums', 'Litchi',
       'Ber', 'Other Dry Fruit', 'Jute & mesta'], dtype=object)
In [14]:
df.dtypes
Out[14]:
State_Name        object
District_Name     object
Crop_Year          int64
Season            object
Crop              object
Area             float64
Production        object
dtype: object
In [15]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38264 entries, 0 to 38263
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   State_Name     38264 non-null  object 
 1   District_Name  38264 non-null  object 
 2   Crop_Year      38264 non-null  int64  
 3   Season         38264 non-null  object 
 4   Crop           38263 non-null  object 
 5   Area           38263 non-null  float64
 6   Production     38263 non-null  object 
dtypes: float64(1), int64(1), object(5)
memory usage: 2.0+ MB
In [16]:
df = pd.read_csv('/content/agriculture_ds.csv', na_values = "=")
In [17]:
df.Production.isnull().sum()
Out[17]:
2567

the following heatmap helps to identify the null values in the dataframa

  • all the white horizontal bars, which are white in color are null values
  • color bar cbar is not requred though
In [19]:
sns.heatmap(df.isnull(), cmap = 'Blues_r')
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efc0cbf11d0>
In [ ]:
df.dtypes
Out[ ]:
State_Name        object
District_Name     object
Crop_Year          int64
Season            object
Crop              object
Area             float64
Production       float64
dtype: object
In [ ]:
df.shape
Out[ ]:
(246091, 7)
In [20]:
df.Production.isnull().count()
Out[20]:
131297
In [ ]:
df.Production.isnull().sum()
In [ ]:
print ("Null values percentage : " , (3727/246091)*100)
Null values percentage :  1.5144804157811542

dropna()

  • deletes all rows, even if the row contains atleast a single null value (ie., across any column)
  • Threshold is another parameter that can check the number of null values
  • for example if there be a need to delete rows that has more than 4 null values
In [21]:
df.dropna(inplace = True)
In [22]:
df.shape
Out[22]:
(128730, 7)

Question: Which state has done well and which are not?

watch - Hans Roslng video from Ted on Data Visualisation

In [23]:
sns.kdeplot(df.Production)
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efc0c9da650>
In [24]:
sns.boxplot(df.Production)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efc0a95cb50>
In [25]:
sns.boxplot(df.Production, hue=df.State_Name)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x7efc0c9daa90>
In [ ]:
sns.boxplot(df.Area)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dadb3b50>
In [ ]:
sns.kdeplot(df.Area)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dad9bad0>
In [ ]:
df[df.State_Name == 'Karnataka']
Out[ ]:
State_Name District_Name Crop_Year Season Crop Area Production
76865 Karnataka BAGALKOT 1998 Kharif Arhar/Tur 6154.0 2602.0
76866 Karnataka BAGALKOT 1998 Kharif Bajra 48855.0 52375.0
76867 Karnataka BAGALKOT 1998 Kharif Castor seed 71.0 61.0
76868 Karnataka BAGALKOT 1998 Kharif Cotton(lint) 15225.0 22129.0
76869 Karnataka BAGALKOT 1998 Kharif Groundnut 16368.0 7734.0
... ... ... ... ... ... ... ...
97981 Karnataka YADGIR 2014 Summer Onion 7.0 34.0
97982 Karnataka YADGIR 2014 Summer Rice 15205.0 35029.0
97983 Karnataka YADGIR 2014 Summer Sunflower 14.0 5.0
97984 Karnataka YADGIR 2014 Whole Year Coconut 168.0 1499.0
97986 Karnataka YADGIR 2014 Whole Year Sugarcane 862.0 78614.0

21079 rows × 7 columns

In [ ]:
df[df.State_Name == "Karnataka"]['District_Name'].unique()
Out[ ]:
array(['BAGALKOT', 'BANGALORE RURAL', 'BELGAUM', 'BELLARY',
       'BENGALURU URBAN', 'BIDAR', 'BIJAPUR', 'CHAMARAJANAGAR',
       'CHIKBALLAPUR', 'CHIKMAGALUR', 'CHITRADURGA', 'DAKSHIN KANNAD',
       'DAVANGERE', 'DHARWAD', 'GADAG', 'GULBARGA', 'HASSAN', 'HAVERI',
       'KODAGU', 'KOLAR', 'KOPPAL', 'MANDYA', 'MYSORE', 'RAICHUR',
       'RAMANAGARA', 'SHIMOGA', 'TUMKUR', 'UDUPI', 'UTTAR KANNAD',
       'YADGIR'], dtype=object)
In [ ]:
df_total = df.loc[df['District_Name'].str.contains('Total', case = False)]
In [ ]:
df_total.count()
Out[ ]:
State_Name       0
District_Name    0
Crop_Year        0
Season           0
Crop             0
Area             0
Production       0
dtype: int64

to create a subset with crop type to be only Rice Crop

In [ ]:
df_total = df.loc[df['State_Name'].str.contains('Total', case = False)]
In [ ]:
df_total
Out[ ]:
State_Name District_Name Crop_Year Season Crop Area Production
In [ ]:
df_rice = df.loc[df['Crop'].str.contains('rice', case = False)]
In [ ]:
df_rice
Out[ ]:
State_Name District_Name Crop_Year Season Crop Area Production
2 Andaman and Nicobar Islands NICOBARS 2000 Kharif Rice 102.00 321.00
12 Andaman and Nicobar Islands NICOBARS 2001 Kharif Rice 83.00 300.00
18 Andaman and Nicobar Islands NICOBARS 2002 Kharif Rice 189.20 510.84
27 Andaman and Nicobar Islands NICOBARS 2003 Kharif Rice 52.00 90.17
36 Andaman and Nicobar Islands NICOBARS 2004 Kharif Rice 52.94 72.57
... ... ... ... ... ... ... ...
246049 West Bengal PURULIA 2013 Summer Rice 516.00 1274.00
246052 West Bengal PURULIA 2013 Winter Rice 302274.00 730136.00
246058 West Bengal PURULIA 2014 Autumn Rice 264.00 721.00
246086 West Bengal PURULIA 2014 Summer Rice 306.00 801.00
246089 West Bengal PURULIA 2014 Winter Rice 279151.00 597899.00

15092 rows × 7 columns

to create a subset with only Rice and Wheat as crops

In [ ]:
df_rice_wheat = df.loc[df['Crop'].str.contains("wheat", case = False) |
                       df['Crop'].str.contains('rice', case = False)]
In [ ]:
df_rice_wheat
Out[ ]:
State_Name District_Name Crop_Year Season Crop Area Production
2 Andaman and Nicobar Islands NICOBARS 2000 Kharif Rice 102.00 321.00
12 Andaman and Nicobar Islands NICOBARS 2001 Kharif Rice 83.00 300.00
18 Andaman and Nicobar Islands NICOBARS 2002 Kharif Rice 189.20 510.84
27 Andaman and Nicobar Islands NICOBARS 2003 Kharif Rice 52.00 90.17
36 Andaman and Nicobar Islands NICOBARS 2004 Kharif Rice 52.94 72.57
... ... ... ... ... ... ... ...
246052 West Bengal PURULIA 2013 Winter Rice 302274.00 730136.00
246058 West Bengal PURULIA 2014 Autumn Rice 264.00 721.00
246084 West Bengal PURULIA 2014 Rabi Wheat 1622.00 3663.00
246086 West Bengal PURULIA 2014 Summer Rice 306.00 801.00
246089 West Bengal PURULIA 2014 Winter Rice 279151.00 597899.00

22970 rows × 7 columns

In [26]:
df.groupby(['State_Name', 'Crop', 'Crop_Year']).sum()
Out[26]:
Area Production
State_Name Crop Crop_Year
Andaman and Nicobar Islands Arecanut 2000.0 4354.00 7200.00
2001.0 4354.00 7300.00
2002.0 4363.00 7350.00
2003.0 4379.00 6707.00
2004.0 4425.37 4781.05
... ... ... ... ...
Maharashtra Wheat 2012.0 335600.00 508000.00
2013.0 481800.00 692000.00
2014.0 492900.00 598200.00
other oilseeds 2003.0 5500.00 1400.00
2004.0 6100.00 2100.00

6763 rows × 2 columns

In [27]:
df[df.State_Name == "West Bengal"]['Crop'].unique()
Out[27]:
array([], dtype=object)
In [ ]:
df.groupby(['State_Name', 'Crop_Year']).sum()
Out[ ]:
Area Production
State_Name Crop_Year
Andaman and Nicobar Islands 2000 44518.00 89060914.00
2001 41163.00 89718700.00
2002 45231.40 94387137.67
2003 44799.40 95296454.67
2004 45308.77 87186497.63
... ... ... ...
West Bengal 2010 7246875.00 38308645.00
2011 7755360.00 36777774.00
2012 7850936.00 38918275.00
2013 7999815.00 37901281.00
2014 8058390.00 43584403.00

519 rows × 2 columns

In [28]:
df_ = df.groupby(['State_Name', 'Crop_Year']).sum()
In [29]:
df_.head()
Out[29]:
Area Production
State_Name Crop_Year
Andaman and Nicobar Islands 2000.0 44518.00 89060914.00
2001.0 41163.00 89718700.00
2002.0 45231.40 94387137.67
2003.0 44799.40 95296454.67
2004.0 45308.77 87186497.63
In [30]:
df_.reset_index(inplace = True)

to get the Crop_Year Count in the dataset

In [31]:
df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()
Out[31]:
Crop_Year
State_Name
Andaman and Nicobar Islands 8
Andhra Pradesh 18
Arunachal Pradesh 18
Assam 18
Bihar 18
Chandigarh 13
Chhattisgarh 15
Dadra and Nagar Haveli 17
Goa 12
Gujarat 16
Haryana 16
Himachal Pradesh 12
Jammu and Kashmir 12
Jharkhand 7
Karnataka 18
Kerala 18
Madhya Pradesh 17
Maharashtra 18
In [ ]:
df[['State_Name', 'Crop']].groupby('State_Name').count()
Out[ ]:
Crop
State_Name
Andaman and Nicobar Islands 203
Andhra Pradesh 9561
Arunachal Pradesh 2545
Assam 14622
Bihar 18874
Chandigarh 89
Chhattisgarh 10368
Dadra and Nagar Haveli 263
Goa 207
Gujarat 8365
Haryana 4540
Himachal Pradesh 2456
Jammu and Kashmir 1632
Jharkhand 1266
Karnataka 21079
Kerala 4003
Madhya Pradesh 22605
Maharashtra 12496
Manipur 1266
Meghalaya 2867
Mizoram 954
Nagaland 3904
Odisha 13524
Puducherry 872
Punjab 3143
Rajasthan 12066
Sikkim 714
Tamil Nadu 13266
Telangana 5591
Tripura 1412
Uttar Pradesh 33189
Uttarakhand 4825
West Bengal 9597
In [ ]:
df_ani = df.loc[df['State_Name'].str.contains('Andaman and Nicobar Islands', case = False)]
In [ ]:
df_ani
Out[ ]:
State_Name District_Name Crop_Year Season Crop Area Production
0 Andaman and Nicobar Islands NICOBARS 2000 Kharif Arecanut 1254.0 2000.00
1 Andaman and Nicobar Islands NICOBARS 2000 Kharif Other Kharif pulses 2.0 1.00
2 Andaman and Nicobar Islands NICOBARS 2000 Kharif Rice 102.0 321.00
3 Andaman and Nicobar Islands NICOBARS 2000 Whole Year Banana 176.0 641.00
4 Andaman and Nicobar Islands NICOBARS 2000 Whole Year Cashewnut 720.0 165.00
... ... ... ... ... ... ... ...
198 Andaman and Nicobar Islands SOUTH ANDAMANS 2010 Rabi Turmeric 10.0 105.00
199 Andaman and Nicobar Islands SOUTH ANDAMANS 2010 Rabi Urad 34.0 15.05
200 Andaman and Nicobar Islands SOUTH ANDAMANS 2010 Whole Year Banana 360.0 5517.00
201 Andaman and Nicobar Islands SOUTH ANDAMANS 2010 Whole Year Coconut 3540.0 11000000.00
202 Andaman and Nicobar Islands SOUTH ANDAMANS 2010 Whole Year Tapioca 22.5 220.00

203 rows × 7 columns

In [ ]:
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andaman and Nicobar Islands'])
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dad06b10>
In [ ]:
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Odisha'])
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dabede90>
In [ ]:
sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andhra Pradesh'])
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dabe2750>
In [ ]:
sns.lineplot(x='Crop_Year', y='Production', data = df[df.State_Name == 'Andhra Pradesh'])
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dab96bd0>
In [ ]:
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dcfd0250>

place legend beside the graph so that they may not overlap

In [ ]:
import matplotlib.pyplot as plt
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = [1,1])
Out[ ]:
<matplotlib.legend.Legend at 0x7fe5c23d2e90>
In [ ]:
# from matplotlib.cbook import boxplot_stats
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = (1, 1))
Out[ ]:
<matplotlib.legend.Legend at 0x7fe5c0514e50>

source for the following code:

https://stackoverflow.com/questions/47230817/plotly-notebook-mode-with-google-colaboratory

  • simply pass "colab" as the value for the parameter renderer in fig.show(renderer="colab")
In [ ]:
import plotly.graph_objects as go
fig = go.Figure(data=[go.Bar(y=[2, 1, 3, 5, 8, 9, 12, 11, 2, 4, 2, 3])],
    layout_title_text="A Figure Displayed with the 'colab' Renderer")
fig.show(renderer="colab")
In [32]:
!pip3 install plotly_express
Collecting plotly_express
  Downloading plotly_express-0.4.1-py2.py3-none-any.whl (2.9 kB)
Requirement already satisfied: plotly>=4.1.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (5.7.0)
Requirement already satisfied: patsy>=0.5 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.5.2)
Requirement already satisfied: pandas>=0.20.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.3.5)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.21.5)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.10.2)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.4.1)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2.8.2)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.5->plotly_express) (1.15.0)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=4.1.0->plotly_express) (8.0.1)
Installing collected packages: plotly-express
Successfully installed plotly-express-0.4.1
In [33]:
import plotly_express as px
In [34]:
df.head(1)
Out[34]:
State_Name District_Name Crop_Year Season Crop Area Production
0 Andaman and Nicobar Islands NICOBARS 2000.0 Kharif Arecanut 1254.0 2000.0
In [35]:
df_ = df.groupby(['State_Name', 'Crop_Year']).sum()
In [36]:
df_.head()
Out[36]:
Area Production
State_Name Crop_Year
Andaman and Nicobar Islands 2000.0 44518.00 89060914.00
2001.0 41163.00 89718700.00
2002.0 45231.40 94387137.67
2003.0 44799.40 95296454.67
2004.0 45308.77 87186497.63
In [37]:
df_.reset_index(inplace = True)
In [38]:
df_.head()
Out[38]:
State_Name Crop_Year Area Production
0 Andaman and Nicobar Islands 2000.0 44518.00 89060914.00
1 Andaman and Nicobar Islands 2001.0 41163.00 89718700.00
2 Andaman and Nicobar Islands 2002.0 45231.40 94387137.67
3 Andaman and Nicobar Islands 2003.0 44799.40 95296454.67
4 Andaman and Nicobar Islands 2004.0 45308.77 87186497.63
In [ ]:
df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()
Out[ ]:
Crop_Year
State_Name
Andaman and Nicobar Islands 203
Andhra Pradesh 9191
Arunachal Pradesh 2536
Assam 13532
Bihar 17762
Chandigarh 88
Chhattisgarh 9838
Dadra and Nagar Haveli 262
Goa 206
Gujarat 6641
Haryana 4197
Himachal Pradesh 2320
Jammu and Kashmir 1596
Jharkhand 1265
Karnataka 20398
Kerala 3914
Madhya Pradesh 20995
Maharashtra 9665
Manipur 1138
Meghalaya 2838
Mizoram 941
Nagaland 3279
Odisha 12785
Puducherry 797
Punjab 2192
Rajasthan 11058
Sikkim 707
Tamil Nadu 12131
Telangana 5405
Tripura 1401
Uttar Pradesh 29966
Uttarakhand 4559
West Bengal 9466

plotly scatter Graph

In [47]:
df_.head(2)
Out[47]:
State_Name Crop_Year Area Production
137 Gujarat 1997.0 9661100.0 27243227.0
253 Maharashtra 1997.0 6461787.0 6492161.0
In [55]:
df_.sort_values('Crop_Year', inplace = True)

plotly graph by Area

In [56]:
# px.scatter(df_, x = "Area", y='Production', animation_frame = 'Crop_Year',
#           animation_group = "State_Name", color = "State_Name")

import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = "Area", y = 'Production', 
                           animation_frame = 'Crop_Year',animation_group = "State_Name", 
                           color = "State_Name"))
# layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states")
fig.show(renderer="colab")
In [57]:
import plotly.graph_objects as go
fig = go.Figure(px.bar(df_, x = "Area", y='Production', 
                           animation_frame = 'Crop_Year',animation_group = "State_Name", 
                           color = "State_Name"),
    layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states"
)
fig.show(renderer="colab")