! pip3 install plotly==5.7.0
! pip3 install chart_studio

import pandas as pd
import numpy as np
import seaborn as sns
import plotly
import plotly.io as pio

import cufflinks as cf
from plotly.offline import download_plotlyjs, iplot, plot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline()

# ~~~~~~~~~~~~~~~~~~ Code for Plotly and CuffLinks ~~~~~~~~~~~~~~~~~~~~~
# reference: https://www.repath.in/Plotly-and-Cuff-Links/

#~~~~~~~~~~~~~~~~ the following function helps Google Colab to display graphs
# and should be used in every cell, where ever a graph needs to be plotted ~~~~~~~~~~~~~~~~~~~

def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))
  
# configure_plotly_browser_state()
# df.plot()

df = pd.read_csv('/content/agriculture_ds.csv')

df.Production.isnull().sum()

1

df.shape()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-0e566b70f572> in <module>()
----> 1 df.shape()

TypeError: 'tuple' object is not callable

df.shape

(38264, 7)

df.head()

sns.heatmap(df.isnull(), cmap = 'Blues_r')

<matplotlib.axes._subplots.AxesSubplot at 0x7efc0fab3ed0>

df.head(1)

df.State_Name.unique()

array(['Andaman and Nicobar Islands', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar'], dtype=object)

df.Crop_Year.unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2010, 1997, 1998, 1999,
       2007, 2008, 2009, 2011, 2012, 2013, 2014, 2015])

df.Season.unique()

array(['Kharif     ', 'Whole Year ', 'Autumn     ', 'Rabi       ',
       'Summer     ', 'Winter     '], dtype=object)

df.Crop.unique()

array(['Arecanut', 'Other Kharif pulses', 'Rice', 'Banana', 'Cashewnut',
       'Coconut ', 'Dry ginger', 'Sugarcane', 'Sweet potato', 'Tapioca',
       'Black pepper', 'Dry chillies', 'other oilseeds', 'Turmeric',
       'Maize', 'Moong(Green Gram)', 'Urad', 'Arhar/Tur', 'Groundnut',
       'Sunflower', 'Bajra', 'Castor seed', 'Cotton(lint)', 'Horse-gram',
       'Jowar', 'Korra', 'Ragi', 'Tobacco', 'Gram', 'Wheat', 'Masoor',
       'Sesamum', 'Linseed', 'Safflower', 'Onion', 'other misc. pulses',
       'Samai', 'Small millets', 'Coriander', 'Potato',
       'Other  Rabi pulses', 'Soyabean', 'Beans & Mutter(Vegetable)',
       'Bhindi', 'Brinjal', 'Citrus Fruit', 'Cucumber', 'Grapes', 'Mango',
       'Orange', 'other fibres', 'Other Fresh Fruits', 'Other Vegetables',
       'Papaya', 'Pome Fruit', 'Tomato', 'Rapeseed &Mustard', 'Mesta',
       'Cowpea(Lobia)', 'Lemon', 'Pome Granet', 'Sapota', 'Cabbage',
       'Peas  (vegetable)', 'Niger seed', 'Bottle Gourd', 'Sannhamp',
       'Varagu', 'Garlic', 'Ginger', 'Oilseeds total', 'Pulses total',
       'Jute', 'Peas & beans (Pulses)', 'Blackgram', 'Paddy', 'Pineapple',
       'Barley', 'Khesari', 'Guar seed', 'Moth',
       'Other Cereals & Millets', 'Cond-spcs other', 'Turnip', 'Carrot',
       'Redish', 'Arcanut (Processed)', 'Atcanut (Raw)',
       'Cashewnut Processed', 'Cashewnut Raw', 'Cardamom', 'Rubber',
       'Bitter Gourd', 'Drum Stick', 'Jack Fruit', 'Snak Guard',
       'Pump Kin', 'Tea', 'Coffee', 'Cauliflower', 'Other Citrus Fruit',
       'Water Melon', 'Total foodgrain', 'Kapas', 'Colocosia', 'Lentil',
       'Bean', 'Jobster', 'Perilla', 'Rajmash Kholar',
       'Ricebean (nagadal)', 'Ash Gourd', 'Beet Root', 'Lab-Lab',
       'Ribed Guard', 'Yam', 'Apple', 'Peach', 'Pear', 'Plums', 'Litchi',
       'Ber', 'Other Dry Fruit', 'Jute & mesta'], dtype=object)

df.dtypes

State_Name        object
District_Name     object
Crop_Year          int64
Season            object
Crop              object
Area             float64
Production        object
dtype: object

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38264 entries, 0 to 38263
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   State_Name     38264 non-null  object 
 1   District_Name  38264 non-null  object 
 2   Crop_Year      38264 non-null  int64  
 3   Season         38264 non-null  object 
 4   Crop           38263 non-null  object 
 5   Area           38263 non-null  float64
 6   Production     38263 non-null  object 
dtypes: float64(1), int64(1), object(5)
memory usage: 2.0+ MB

df = pd.read_csv('/content/agriculture_ds.csv', na_values = "=")

df.Production.isnull().sum()

2567

the following heatmap helps to identify the null values in the dataframa¶

all the white horizontal bars, which are white in color are null values
color bar cbar is not requred though

sns.heatmap(df.isnull(), cmap = 'Blues_r')

<matplotlib.axes._subplots.AxesSubplot at 0x7efc0cbf11d0>

df.dtypes

State_Name        object
District_Name     object
Crop_Year          int64
Season            object
Crop              object
Area             float64
Production       float64
dtype: object

df.shape

(246091, 7)

df.Production.isnull().count()

131297

df.Production.isnull().sum()

print ("Null values percentage : " , (3727/246091)*100)

Null values percentage :  1.5144804157811542

dropna()¶

deletes all rows, even if the row contains atleast a single null value (ie., across any column)
Threshold is another parameter that can check the number of null values
for example if there be a need to delete rows that has more than 4 null values

df.dropna(inplace = True)

df.shape

(128730, 7)

Question: Which state has done well and which are not?¶

watch - Hans Roslng video from Ted on Data Visualisation

sns.kdeplot(df.Production)

<matplotlib.axes._subplots.AxesSubplot at 0x7efc0c9da650>

sns.boxplot(df.Production)

/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

<matplotlib.axes._subplots.AxesSubplot at 0x7efc0a95cb50>

sns.boxplot(df.Production, hue=df.State_Name)

/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

<matplotlib.axes._subplots.AxesSubplot at 0x7efc0c9daa90>

sns.boxplot(df.Area)

/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning:

Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dadb3b50>

sns.kdeplot(df.Area)

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dad9bad0>

df[df.State_Name == 'Karnataka']

df[df.State_Name == "Karnataka"]['District_Name'].unique()

array(['BAGALKOT', 'BANGALORE RURAL', 'BELGAUM', 'BELLARY',
       'BENGALURU URBAN', 'BIDAR', 'BIJAPUR', 'CHAMARAJANAGAR',
       'CHIKBALLAPUR', 'CHIKMAGALUR', 'CHITRADURGA', 'DAKSHIN KANNAD',
       'DAVANGERE', 'DHARWAD', 'GADAG', 'GULBARGA', 'HASSAN', 'HAVERI',
       'KODAGU', 'KOLAR', 'KOPPAL', 'MANDYA', 'MYSORE', 'RAICHUR',
       'RAMANAGARA', 'SHIMOGA', 'TUMKUR', 'UDUPI', 'UTTAR KANNAD',
       'YADGIR'], dtype=object)

df_total = df.loc[df['District_Name'].str.contains('Total', case = False)]

df_total.count()

State_Name       0
District_Name    0
Crop_Year        0
Season           0
Crop             0
Area             0
Production       0
dtype: int64

to create a subset with crop type to be only Rice Crop¶

df_total = df.loc[df['State_Name'].str.contains('Total', case = False)]

df_total

df_rice = df.loc[df['Crop'].str.contains('rice', case = False)]

df_rice

to create a subset with only Rice and Wheat as crops¶

df_rice_wheat = df.loc[df['Crop'].str.contains("wheat", case = False) |
                       df['Crop'].str.contains('rice', case = False)]

df_rice_wheat

df.groupby(['State_Name', 'Crop', 'Crop_Year']).sum()

df[df.State_Name == "West Bengal"]['Crop'].unique()

array([], dtype=object)

df.groupby(['State_Name', 'Crop_Year']).sum()

df_ = df.groupby(['State_Name', 'Crop_Year']).sum()

df_.head()

df_.reset_index(inplace = True)

to get the Crop_Year Count in the dataset¶

df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()

df[['State_Name', 'Crop']].groupby('State_Name').count()

df_ani = df.loc[df['State_Name'].str.contains('Andaman and Nicobar Islands', case = False)]

df_ani

sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andaman and Nicobar Islands'])

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dad06b10>

sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Odisha'])

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dabede90>

sns.lineplot(x='Crop_Year', y='Area', data = df[df.State_Name == 'Andhra Pradesh'])

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dabe2750>

sns.lineplot(x='Crop_Year', y='Production', data = df[df.State_Name == 'Andhra Pradesh'])

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dab96bd0>

sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')

<matplotlib.axes._subplots.AxesSubplot at 0x7fe5dcfd0250>

place legend beside the graph so that they may not overlap¶

import matplotlib.pyplot as plt
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = [1,1])

<matplotlib.legend.Legend at 0x7fe5c23d2e90>

# from matplotlib.cbook import boxplot_stats
sns.lineplot(x = 'Crop_Year', y = 'Production', data = df_, hue = 'State_Name')
plt.legend(bbox_to_anchor = (1, 1))

<matplotlib.legend.Legend at 0x7fe5c0514e50>

source for the following code:¶

https://stackoverflow.com/questions/47230817/plotly-notebook-mode-with-google-colaboratory

simply pass "colab" as the value for the parameter renderer in fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(data=[go.Bar(y=[2, 1, 3, 5, 8, 9, 12, 11, 2, 4, 2, 3])],
    layout_title_text="A Figure Displayed with the 'colab' Renderer")
fig.show(renderer="colab")

!pip3 install plotly_express

Collecting plotly_express
  Downloading plotly_express-0.4.1-py2.py3-none-any.whl (2.9 kB)
Requirement already satisfied: plotly>=4.1.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (5.7.0)
Requirement already satisfied: patsy>=0.5 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.5.2)
Requirement already satisfied: pandas>=0.20.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.3.5)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.21.5)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.10.2)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.4.1)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2.8.2)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.5->plotly_express) (1.15.0)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=4.1.0->plotly_express) (8.0.1)
Installing collected packages: plotly-express
Successfully installed plotly-express-0.4.1

import plotly_express as px

df.head(1)

df_ = df.groupby(['State_Name', 'Crop_Year']).sum()

df_.head()

df_.reset_index(inplace = True)

df_.head()

df_[['State_Name', 'Crop_Year']].groupby('State_Name').count()

plotly scatter Graph¶

df_.head(2)

df_.sort_values('Crop_Year', inplace = True)

plotly graph by Area¶

# px.scatter(df_, x = "Area", y='Production', animation_frame = 'Crop_Year',
#           animation_group = "State_Name", color = "State_Name")

import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = "Area", y = 'Production', 
                           animation_frame = 'Crop_Year',animation_group = "State_Name", 
                           color = "State_Name"))
# layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states")
fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(px.bar(df_, x = "Area", y='Production', 
                           animation_frame = 'Crop_Year',animation_group = "State_Name", 
                           color = "State_Name"),
    layout_title_text="Area on X and Production on Y, Animation frame as Crop Year grouped by State Name, Colors indicates states"
)
fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(px.bar(df_, x = "Crop_Year", y='Area', color = "Crop_Year"),
    layout_title_text="Crop year on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area', color = "State_Name"),
    layout_title_text="State on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area', color = "Crop_Year"),
    layout_title_text="State on X and Area on Y, Crop Year as Colors indicates states"
)
fig.show(renderer="colab")

import plotly.graph_objects as go
fig = go.Figure(px.funnel(df_, x = "State_Name", y='Area'),
    layout_title_text="State on X and Area on Y"
)
fig.show(renderer="colab")

Exercise from Plotly Webinar - Training session¶

df = px.data.gapminder()

df

df = px.data.gapminder().query("year == 2007")

df

df.head()

configure_plotly_browser_state
import plotly.graph_objects as go
fig = go.Figure(px.strip(df, x = "lifeExp", hover_name = 'country', color = 'continent'))
fig.show(renderer="colab")

df = pd.read_csv('/content/agriculture_ds.csv', na_values = "=")

df.head(1)

df.dropna(inplace = True)

df_[(df_.State_Name == 'Kerala') & (df_.Crop_Year == 2000)]

df[(df.State_Name == 'Kerala') & (df.Crop_Year == 2000)].sort_values('Production')

as the weight of the coconut will be very heavier than when compared to others, this turns out to be outlie.
so total production alone should not be considered but should be sliced into crops of similar nature
eg: rice, wheat, maize, ragi, etc...

df_ = df[df.Crop.isin(['Rice', 'Wheat', 'Maize', "Ragi"])]

df_

df_ = df[df.Crop.isin(['Rice', 'Wheat', 'Maize', "Ragi"])].groupby(['State_Name', 'Crop_Year']).sum()

df_

df_.reset_index(inplace = True)

df_.head(1)

df_.sort_values('Crop_Year', inplace = True)

import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x='Area', y = 'Production',
           animation_frame = 'Crop_Year',
           animation_group = 'State_Name', color = 'State_Name'))
fig.show(renderer="colab")

df_['Efficiency'] = df_['Production'] / df_['Area']

df_.head(2)

import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = 'Area', y = 'Efficiency', size = 'Production',
           animation_frame = "Crop_Year", animation_group = "State_Name",
           color = 'State_Name'))
fig.show(renderer="colab")

df_.head(2)

fixing the points that are going out side the plot with range parameter¶

import plotly.graph_objects as go
fig = go.Figure(px.scatter(df_, x = 'Area', y = 'Efficiency', size = 'Production',
           animation_frame = "Crop_Year", animation_group = "State_Name", range_y = [0.75, 5], range_x = [-1E6, 20E6],
           color = 'State_Name'))
fig.show(renderer="colab")

df[(df.State_Name == 'Kerala') & (df.Crop_Year == 2000)].sort_values('Production')

df[df.Crop.isin(['Rice', "Wheat", "Ragi", "Maize"])]

df_ = df[df.Crop.isin(['Rice', "Wheat", "Ragi", "Maize"])].groupby(['State_Name', 'Crop_Year']).sum()

df_.head(2)

df_.reset_index(inplace=True)

df_.sort_values('Crop_Year', inplace=True)

df_.head(2)

fig = go.Figure(px.scatter(df_, x="Area", y='Production', 
                           animation_frame = "Crop_Year", 
                           animation_group='State_Name', color = "State_Name"))
fig.show(renderer="colab")

New Col by name Efficiency is created, and added to the parameter Size¶

df_['Efficiency'] = df_['Production'] / df_['Area']

fig = go.Figure(px.scatter(df_, x="Area", y='Efficiency', size='Production',
                           animation_frame = "Crop_Year", 
                           animation_group='State_Name', color = "State_Name"))
fig.show(renderer="colab")

address the values that are going ## beyong the graph area

fig = go.Figure(px.scatter(df_, x="Area", y='Efficiency', size='Production',
                           animation_frame = "Crop_Year", 
                           animation_group='State_Name', color = "State_Name",
                           range_y = [0.75, 5], range_x = [-1E6, 20E6]))
fig.show(renderer="colab")

some other exercises like:

single state analysis
two states analysis for comparative view - on a single crop or couple of more crops
by the price values - as the cost of rice and cost of oil seeds is not the same
only years for comparision
with too much time, experiment with a comparative view of crop types and
seaasons and their types.

			Area	Production
State_Name	Crop	Crop_Year
Andaman and Nicobar Islands	Arecanut	2000.0	4354.00	7200.00
		2001.0	4354.00	7300.00
		2002.0	4363.00	7350.00
		2003.0	4379.00	6707.00
		2004.0	4425.37	4781.05
...	...	...	...	...
Maharashtra	Wheat	2012.0	335600.00	508000.00
		2013.0	481800.00	692000.00
		2014.0	492900.00	598200.00
	other oilseeds	2003.0	5500.00	1400.00
	other oilseeds	2004.0	6100.00	2100.00

		Area	Production
State_Name	Crop_Year
Andaman and Nicobar Islands	2000.0	44518.00	89060914.00
	2001.0	41163.00	89718700.00
	2002.0	45231.40	94387137.67
	2003.0	44799.40	95296454.67
	2004.0	45308.77	87186497.63

	Crop
State_Name
Andaman and Nicobar Islands	203
Andhra Pradesh	9561
Arunachal Pradesh	2545
Assam	14622
Bihar	18874
Chandigarh	89
Chhattisgarh	10368
Dadra and Nagar Haveli	263
Goa	207
Gujarat	8365
Haryana	4540
Himachal Pradesh	2456
Jammu and Kashmir	1632
Jharkhand	1266
Karnataka	21079
Kerala	4003
Madhya Pradesh	22605
Maharashtra	12496
Manipur	1266
Meghalaya	2867
Mizoram	954
Nagaland	3904
Odisha	13524
Puducherry	872
Punjab	3143
Rajasthan	12066
Sikkim	714
Tamil Nadu	13266
Telangana	5591
Tripura	1412
Uttar Pradesh	33189
Uttarakhand	4825
West Bengal	9597

		Area	Production
State_Name	Crop_Year
Andaman and Nicobar Islands	2000.0	44518.00	89060914.00
	2001.0	41163.00	89718700.00
	2002.0	45231.40	94387137.67
	2003.0	44799.40	95296454.67
	2004.0	45308.77	87186497.63

	State_Name	Crop_Year	Area	Production
0	Andaman and Nicobar Islands	2000.0	44518.00	89060914.00
1	Andaman and Nicobar Islands	2001.0	41163.00	89718700.00
2	Andaman and Nicobar Islands	2002.0	45231.40	94387137.67
3	Andaman and Nicobar Islands	2003.0	44799.40	95296454.67
4	Andaman and Nicobar Islands	2004.0	45308.77	87186497.63

Consulting - Quality

Agriculture OpenSource DataSet

Visualisation with Plotly Express

the following heatmap helps to identify the null values in the dataframa¶

dropna()¶

Question: Which state has done well and which are not?¶

to create a subset with crop type to be only Rice Crop¶

to create a subset with only Rice and Wheat as crops¶

to get the Crop_Year Count in the dataset¶

place legend beside the graph so that they may not overlap¶

source for the following code:¶

plotly scatter Graph¶

plotly graph by Area¶

Exercise from Plotly Webinar - Training session¶

fixing the points that are going out side the plot with range parameter¶

New Col by name Efficiency is created, and added to the parameter Size¶

	State_Name	District_Name	Crop_Year	Season	Crop	Area	Production
0	Andaman and Nicobar Islands	NICOBARS	2000	Kharif	Arecanut	1254.0	2000
1	Andaman and Nicobar Islands	NICOBARS	2000	Kharif	Other Kharif pulses	2.0	1
2	Andaman and Nicobar Islands	NICOBARS	2000	Kharif	Rice	102.0	321
3	Andaman and Nicobar Islands	NICOBARS	2000	Whole Year	Banana	176.0	641
4	Andaman and Nicobar Islands	NICOBARS	2000	Whole Year	Cashewnut	720.0	165

	State_Name	District_Name	Crop_Year	Season	Crop	Area	Production
76865	Karnataka	BAGALKOT	1998	Kharif	Arhar/Tur	6154.0	2602.0
76866	Karnataka	BAGALKOT	1998	Kharif	Bajra	48855.0	52375.0
76867	Karnataka	BAGALKOT	1998	Kharif	Castor seed	71.0	61.0
76868	Karnataka	BAGALKOT	1998	Kharif	Cotton(lint)	15225.0	22129.0
76869	Karnataka	BAGALKOT	1998	Kharif	Groundnut	16368.0	7734.0
...	...	...	...	...	...	...	...
97981	Karnataka	YADGIR	2014	Summer	Onion	7.0	34.0
97982	Karnataka	YADGIR	2014	Summer	Rice	15205.0	35029.0
97983	Karnataka	YADGIR	2014	Summer	Sunflower	14.0	5.0
97984	Karnataka	YADGIR	2014	Whole Year	Coconut	168.0	1499.0
97986	Karnataka	YADGIR	2014	Whole Year	Sugarcane	862.0	78614.0

	State_Name	Crop_Year	Area	Production
137	Gujarat	1997.0	9661100.0	27243227.0
253	Maharashtra	1997.0	6461787.0	6492161.0

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
0	Afghanistan	Asia	1952	28.801	8425333	779.445314	AFG	4
1	Afghanistan	Asia	1957	30.332	9240934	820.853030	AFG	4
2	Afghanistan	Asia	1962	31.997	10267083	853.100710	AFG	4
3	Afghanistan	Asia	1967	34.020	11537966	836.197138	AFG	4
4	Afghanistan	Asia	1972	36.088	13079460	739.981106	AFG	4
...	...	...	...	...	...	...	...	...
1699	Zimbabwe	Africa	1987	62.351	9216418	706.157306	ZWE	716
1700	Zimbabwe	Africa	1992	60.377	10704340	693.420786	ZWE	716
1701	Zimbabwe	Africa	1997	46.809	11404948	792.449960	ZWE	716
1702	Zimbabwe	Africa	2002	39.989	11926563	672.038623	ZWE	716
1703	Zimbabwe	Africa	2007	43.487	12311143	469.709298	ZWE	716

	State_Name	District_Name	Crop_Year	Season	Crop	Area	Production
99868	Kerala	KOTTAYAM	2000	Kharif	Sesamum	6.0	1.0
100437	Kerala	MALAPPURAM	2000	Kharif	Ragi	4.0	3.0
99567	Kerala	KOLLAM	2000	Summer	Rice	4.0	4.0
98946	Kerala	KANNUR	2000	Kharif	Sesamum	10.0	6.0
98604	Kerala	IDUKKI	2000	Kharif	Ragi	8.0	7.0
...	...	...	...	...	...	...	...
101702	Kerala	THRISSUR	2000	Whole Year	Coconut	89472.0	540000000.0
98953	Kerala	KANNUR	2000	Whole Year	Coconut	96975.0	621000000.0
100445	Kerala	MALAPPURAM	2000	Whole Year	Coconut	110378.0	626000000.0
101425	Kerala	THIRUVANANTHAPURAM	2000	Whole Year	Coconut	88663.0	635000000.0
100162	Kerala	KOZHIKODE	2000	Whole Year	Coconut	128739.0	903000000.0

	State_Name	Crop_Year	Area	Production	Efficiency
153	Haryana	1997	2996000.0	10134000.0	3.382510
330	Odisha	1997	137600.0	117812.0	0.856192

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
11	Afghanistan	Asia	2007	43.828	31889923	974.580338	AFG	4
23	Albania	Europe	2007	76.423	3600523	5937.029526	ALB	8
35	Algeria	Africa	2007	72.301	33333216	6223.367465	DZA	12
47	Angola	Africa	2007	42.731	12420476	4797.231267	AGO	24
59	Argentina	Americas	2007	75.320	40301927	12779.379640	ARG	32
...	...	...	...	...	...	...	...	...
1655	Vietnam	Asia	2007	74.249	85262356	2441.576404	VNM	704
1667	West Bank and Gaza	Asia	2007	73.422	4018332	3025.349798	PSE	275
1679	Yemen, Rep.	Asia	2007	62.698	22211743	2280.769906	YEM	887
1691	Zambia	Africa	2007	42.384	11746035	1271.211593	ZMB	894
1703	Zimbabwe	Africa	2007	43.487	12311143	469.709298	ZWE	716

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
11	Afghanistan	Asia	2007	43.828	31889923	974.580338	AFG	4
23	Albania	Europe	2007	76.423	3600523	5937.029526	ALB	8
35	Algeria	Africa	2007	72.301	33333216	6223.367465	DZA	12
47	Angola	Africa	2007	42.731	12420476	4797.231267	AGO	24
59	Argentina	Americas	2007	75.320	40301927	12779.379640	ARG	32

		Area	Production
State_Name	Crop_Year
Andaman and Nicobar Islands	2000	10881.00	32184.00
	2001	9801.00	27333.00
	2002	10885.00	32111.66
	2003	10561.37	30850.87
	2004	10734.92	29192.23
...	...	...	...
West Bengal	2010	5361309.00	14630175.00
	2011	5855620.00	15851031.00
	2012	5898997.00	16315137.00
	2013	5999179.00	16936255.00
	2014	6023075.00	17527428.00