Agriculture DataSet
Bars, Boxes and Functions
work_with_clean_Agri_ds
In [1]:
!pip3 install plotly_express
Collecting plotly_express
  Downloading plotly_express-0.4.1-py2.py3-none-any.whl (2.9 kB)
Requirement already satisfied: patsy>=0.5 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.5.2)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.4.1)
Requirement already satisfied: pandas>=0.20.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.3.5)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (1.21.6)
Requirement already satisfied: plotly>=4.1.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (5.5.0)
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotly_express) (0.10.2)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2022.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.20.0->plotly_express) (2.8.2)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.5->plotly_express) (1.15.0)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=4.1.0->plotly_express) (8.0.1)
Installing collected packages: plotly-express
Successfully installed plotly-express-0.4.1
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly_express as px
In [4]:
df = pd.read_csv('/content/clean_agri_ds.csv')
In [5]:
df.State_Name.unique()
Out[5]:
array(['Andhra Pradesh', 'Assam', 'Andaman and Nicobar Islands',
       'Arunachal Pradesh', 'Bihar'], dtype=object)
In [6]:
df.dtypes
Out[6]:
Unnamed: 0         int64
State_Name        object
District_Name     object
Crop_Year          int64
Season            object
Crop              object
Area             float64
Production       float64
Len State          int64
Len Dist           int64
Len Season         int64
Len Crop           int64
dtype: object
In [7]:
df.head(3)
Out[7]:
Unnamed: 0 State_Name District_Name Crop_Year Season Crop Area Production Len State Len Dist Len Season Len Crop
0 4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0 14 7 6 17
1 21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0 5 13 6 6
2 2592 Andhra Pradesh GUNTUR 1998 Rabi Rice 29100.0 99900.0 14 6 4 4
In [8]:
df[df.isna().any(axis=1)]
Out[8]:
Unnamed: 0 State_Name District_Name Crop_Year Season Crop Area Production Len State Len Dist Len Season Len Crop
In [9]:
df[df['Production'] == "="]
Out[9]:
Unnamed: 0 State_Name District_Name Crop_Year Season Crop Area Production Len State Len Dist Len Season Len Crop
In [10]:
df.drop(['Len State', 'Len Dist', 'Len Season', 'Len Crop'], inplace = True, axis=1)
In [11]:
df.head(3)
Out[11]:
Unnamed: 0 State_Name District_Name Crop_Year Season Crop Area Production
0 4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0
1 21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0
2 2592 Andhra Pradesh GUNTUR 1998 Rabi Rice 29100.0 99900.0
In [12]:
df.describe()
Out[12]:
Unnamed: 0 Crop_Year Area Production
count 38181.000000 38181.000000 38181.000000 3.818100e+04
mean 19153.164794 2005.863440 7285.304866 5.326191e+05
std 11039.090664 5.127556 27716.183577 1.407250e+07
min 0.000000 1997.000000 0.200000 0.000000e+00
25% 9610.000000 2002.000000 88.000000 1.070000e+02
50% 19160.000000 2006.000000 498.000000 7.810000e+02
75% 28710.000000 2010.000000 2698.000000 6.515000e+03
max 38262.000000 2014.000000 877029.000000 7.801620e+08
In [13]:
df.set_index(['Unnamed: 0'])
Out[13]:
State_Name District_Name Crop_Year Season Crop Area Production
Unnamed: 0
4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0
21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0
2592 Andhra Pradesh GUNTUR 1998 Rabi Rice 29100.0 99900.0
181 Andaman and Nicobar Islands SOUTH ANDAMANS 2006 Whole Year Sugarcane 53.5 999.5
11244 Arunachal Pradesh PAPUM PARE 1998 Whole Year Sugarcane 107.0 999.0
... ... ... ... ... ... ... ...
6528 Andhra Pradesh SPSR NELLORE 2002 Whole Year Cucumber 85.0 0.0
6532 Andhra Pradesh SPSR NELLORE 2002 Whole Year Other Vegetables 525.0 0.0
7202 Andhra Pradesh SRIKAKULAM 2002 Whole Year Bottle Gourd 45.0 0.0
7204 Andhra Pradesh SRIKAKULAM 2002 Whole Year Cabbage 242.0 0.0
5818 Andhra Pradesh PRAKASAM 2003 Whole Year Cucumber 331.0 0.0

38181 rows × 7 columns

In [14]:
df.rename(columns = {'Unnamed: 0':"ColIndex"}, inplace = True)
In [15]:
df.head(2)
Out[15]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
0 4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0
1 21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0
In [16]:
df.set_index(['ColIndex'])
Out[16]:
State_Name District_Name Crop_Year Season Crop Area Production
ColIndex
4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0
21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0
2592 Andhra Pradesh GUNTUR 1998 Rabi Rice 29100.0 99900.0
181 Andaman and Nicobar Islands SOUTH ANDAMANS 2006 Whole Year Sugarcane 53.5 999.5
11244 Arunachal Pradesh PAPUM PARE 1998 Whole Year Sugarcane 107.0 999.0
... ... ... ... ... ... ... ...
6528 Andhra Pradesh SPSR NELLORE 2002 Whole Year Cucumber 85.0 0.0
6532 Andhra Pradesh SPSR NELLORE 2002 Whole Year Other Vegetables 525.0 0.0
7202 Andhra Pradesh SRIKAKULAM 2002 Whole Year Bottle Gourd 45.0 0.0
7204 Andhra Pradesh SRIKAKULAM 2002 Whole Year Cabbage 242.0 0.0
5818 Andhra Pradesh PRAKASAM 2003 Whole Year Cucumber 331.0 0.0

38181 rows × 7 columns

In [17]:
df.head(2)
Out[17]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
0 4351 Andhra Pradesh KRISHNA 2005 Kharif Moong(Green Gram) 12334.0 9991.0
1 21402 Assam KARBI ANGLONG 2013 Kharif Papaya 571.0 9991.0
In [18]:
df.sort_values(['Production'], inplace = True, ascending = False)
In [19]:
df.head(2)
Out[19]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
3939 2543 Andhra Pradesh EAST GODAVARI 2014 Whole Year Coconut 46865.0 780162000.0
4954 2432 Andhra Pradesh EAST GODAVARI 2012 Whole Year Coconut 49516.0 729965000.0

Create subset for AP from India dataset

In [20]:
dfap = df[df['State_Name'] == "Andhra Pradesh"]
In [21]:
dfap.head(3)
Out[21]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
3939 2543 Andhra Pradesh EAST GODAVARI 2014 Whole Year Coconut 46865.0 780162000.0
4954 2432 Andhra Pradesh EAST GODAVARI 2012 Whole Year Coconut 49516.0 729965000.0
5092 2488 Andhra Pradesh EAST GODAVARI 2013 Whole Year Coconut 49114.0 720895000.0
In [22]:
dfap_group = df[df['State_Name'] == "Andhra Pradesh"].groupby('District_Name').sum()
In [23]:
dfap_group
Out[23]:
ColIndex Crop_Year Area Production
District_Name
ANANTAPUR 467368 1572950 18471177.0 8.150159e+07
CHITTOOR 1082883 1568880 6102142.0 3.741366e+08
EAST GODAVARI 1633239 1516502 11593557.0 8.271057e+09
GUNTUR 1975435 1372144 13739734.0 8.479591e+07
KADAPA 2976731 1639233 7219613.0 2.566223e+07
KRISHNA 2763748 1267735 11134515.0 3.694278e+08
KURNOOL 4207466 1653083 16968338.0 3.494962e+07
PRAKASAM 4750501 1608867 9611563.0 3.770760e+07
SPSR NELLORE 4521747 1360080 6198964.0 1.314234e+08
SRIKAKULAM 5030712 1372127 7102905.0 2.141621e+09
VISAKHAPATANAM 6556844 1622967 5645769.0 1.010412e+09
VIZIANAGARAM 6526938 1474488 6516585.0 4.741696e+08
WEST GODAVARI 5476505 1151411 11202457.0 4.287727e+09
In [24]:
dfap.head(2)
Out[24]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
3939 2543 Andhra Pradesh EAST GODAVARI 2014 Whole Year Coconut 46865.0 780162000.0
4954 2432 Andhra Pradesh EAST GODAVARI 2012 Whole Year Coconut 49516.0 729965000.0
In [25]:
dfap.rename(columns = {"Unnamed: 0":"ColIndex"}, inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py:5047: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
In [26]:
dfap.set_index('ColIndex')
Out[26]:
State_Name District_Name Crop_Year Season Crop Area Production
ColIndex
2543 Andhra Pradesh EAST GODAVARI 2014 Whole Year Coconut 46865.0 780162000.0
2432 Andhra Pradesh EAST GODAVARI 2012 Whole Year Coconut 49516.0 729965000.0
2488 Andhra Pradesh EAST GODAVARI 2013 Whole Year Coconut 49114.0 720895000.0
2378 Andhra Pradesh EAST GODAVARI 2011 Whole Year Coconut 50741.0 719961050.0
9829 Andhra Pradesh WEST GODAVARI 2014 Whole Year Coconut 21729.0 718991000.0
... ... ... ... ... ... ... ...
8711 Andhra Pradesh VIZIANAGARAM 2002 Whole Year Cabbage 147.0 0.0
8709 Andhra Pradesh VIZIANAGARAM 2002 Whole Year Bottle Gourd 102.0 0.0
1203 Andhra Pradesh CHITTOOR 2002 Whole Year Cabbage 4.0 0.0
5811 Andhra Pradesh PRAKASAM 2003 Whole Year Bottle Gourd 46.0 0.0
5818 Andhra Pradesh PRAKASAM 2003 Whole Year Cucumber 331.0 0.0

9561 rows × 7 columns

In [27]:
dfap.describe()
Out[27]:
ColIndex Crop_Year Area Production
count 9561.000000 9561.000000 9561.000000 9.561000e+03
mean 5017.269846 2006.115155 13754.556950 1.812006e+06
std 2779.343152 5.175459 47304.631455 2.794296e+07
min 203.000000 1997.000000 1.000000 0.000000e+00
25% 2613.000000 2002.000000 176.000000 2.140000e+02
50% 5020.000000 2006.000000 1133.000000 1.913000e+03
75% 7426.000000 2011.000000 6631.000000 1.396500e+04
max 9830.000000 2014.000000 877029.000000 7.801620e+08
In [28]:
dfap['Crop_Year'] = df['Crop_Year'].astype(str)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
In [29]:
dfap['ColIndex'] = dfap['ColIndex'].astype(str)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
In [30]:
dfap.describe()
Out[30]:
Area Production
count 9561.000000 9.561000e+03
mean 13754.556950 1.812006e+06
std 47304.631455 2.794296e+07
min 1.000000 0.000000e+00
25% 176.000000 2.140000e+02
50% 1133.000000 1.913000e+03
75% 6631.000000 1.396500e+04
max 877029.000000 7.801620e+08
In [31]:
sns.lineplot(x="District_Name", y="Area", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
Out[31]:
<matplotlib.legend.Legend at 0x7f0ba344a650>
In [32]:
sns.lineplot(x="District_Name", y="Production", hue='Crop_Year', data = dfap)
plt.xticks(rotation = 90)
plt.legend(bbox_to_anchor = [1,1])
Out[32]:
<matplotlib.legend.Legend at 0x7f0ba335c3d0>
In [33]:
sns.barplot(x="District_Name", y="Area", data = dfap)
plt.xticks(rotation = 90)
Out[33]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]),
 <a list of 13 Text major ticklabel objects>)
In [34]:
sns.barplot(x="District_Name", y="Production", data = dfap)
plt.xticks(rotation = 90)
Out[34]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]),
 <a list of 13 Text major ticklabel objects>)
In [35]:
dfap['District_Name'].unique()
Out[35]:
array(['EAST GODAVARI', 'WEST GODAVARI', 'SRIKAKULAM', 'VISAKHAPATANAM',
       'VIZIANAGARAM', 'KRISHNA', 'CHITTOOR', 'SPSR NELLORE', 'ANANTAPUR',
       'GUNTUR', 'PRAKASAM', 'KADAPA', 'KURNOOL'], dtype=object)
In [36]:
dfap.describe()
Out[36]:
Area Production
count 9561.000000 9.561000e+03
mean 13754.556950 1.812006e+06
std 47304.631455 2.794296e+07
min 1.000000 0.000000e+00
25% 176.000000 2.140000e+02
50% 1133.000000 1.913000e+03
75% 6631.000000 1.396500e+04
max 877029.000000 7.801620e+08
In [37]:
dfap.dtypes
Out[37]:
ColIndex          object
State_Name        object
District_Name     object
Crop_Year         object
Season            object
Crop              object
Area             float64
Production       float64
dtype: object

Working with Plot features in Pandas

  • The .plot.* methods are applicable on both Series and DataFrames
  • By default, each of the columns is plotted as a different element (line, boxplot,…)
  • Any plot created by pandas is a Matplotlib object.

source: https://pandas.pydata.org/docs/getting_started/intro_tutorials/04_plotting.html

In [38]:
dfap.plot(figsize = (8,4))
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0ba34f0310>
In [39]:
dfap.groupby('District_Name')['Area'].sum().plot.bar()
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0ba3391a90>
In [40]:
dfap.groupby('District_Name').sum().plot(kind='line')
plt.xticks(rotation=90)
Out[40]:
(array([-2.,  0.,  2.,  4.,  6.,  8., 10., 12., 14.]),
 <a list of 9 Text major ticklabel objects>)
In [41]:
dfap.sort_values(by="District_Name", inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/util/_decorators.py:311: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
In [42]:
dfap.plot.scatter(x='Area', y='Production')
Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0ba33f1190>
In [43]:
dfap.plot.box()
Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0ba33e5890>
In [44]:
fig, axs = plt.subplots(figsize = (12, 6));
# dfap.plot.area(figsize = (12, 6), subplots = True)
dfap.plot.line(ax=axs, subplots = True, marker=".");
/usr/local/lib/python3.7/dist-packages/pandas/plotting/_matplotlib/__init__.py:71: UserWarning: To output multiple subplots, the figure containing the passed axes is being cleared
  plot_obj.generate()
In [45]:
fig.savefig('Area_Production.png')
In [46]:
dfap
Out[46]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
28468 817 Andhra Pradesh ANANTAPUR 2012 Kharif Papaya 447.0 17197.0
24059 277 Andhra Pradesh ANANTAPUR 1999 Rabi Maize 455.0 2203.0
33419 896 Andhra Pradesh ANANTAPUR 2013 Rabi Cotton(lint) 62.0 127.0
16971 838 Andhra Pradesh ANANTAPUR 2012 Rabi Groundnut 20987.0 34419.0
33214 323 Andhra Pradesh ANANTAPUR 2000 Rabi Safflower 703.0 129.0
... ... ... ... ... ... ... ... ...
1615 9796 Andhra Pradesh WEST GODAVARI 2014 Kharif Cowpea(Lobia) 21.0 9.0
31245 9354 Andhra Pradesh WEST GODAVARI 2001 Kharif Dry chillies 40.0 145.0
1658 9671 Andhra Pradesh WEST GODAVARI 2010 Whole Year Arecanut 20.0 9.0
15576 9806 Andhra Pradesh WEST GODAVARI 2014 Kharif Sapota 500.0 3798.0
32297 9713 Andhra Pradesh WEST GODAVARI 2012 Kharif Arhar/Tur 173.0 136.0

9561 rows × 8 columns

In [47]:
dfap.sort_values("Production", ascending = True, inplace = True)
/usr/local/lib/python3.7/dist-packages/pandas/util/_decorators.py:311: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
In [48]:
dfaps = dfap['Production'].cumsum()
In [49]:
dfaps.plot()
plt.xticks(rotation = 90)
Out[49]:
(array([-5000.,     0.,  5000., 10000., 15000., 20000., 25000., 30000.,
        35000., 40000., 45000.]), <a list of 11 Text major ticklabel objects>)
In [50]:
dfapa = dfap['Area'].cumsum()
In [51]:
dfapa.plot()
plt.xticks(rotation = 90)
Out[51]:
(array([-5000.,     0.,  5000., 10000., 15000., 20000., 25000., 30000.,
        35000., 40000., 45000.]), <a list of 11 Text major ticklabel objects>)
In [52]:
plt.figure()
dfap.Production.plot(kind='density')
Out[52]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0ba357a810>
In [53]:
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar();
In [54]:
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
In [55]:
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.bar(stacked = True);
plt.legend(bbox_to_anchor = [1,1])      # to place legend outside the graph, towards the right top
Out[55]:
<matplotlib.legend.Legend at 0x7f0ba33e7350>
In [56]:
df2 = pd.DataFrame(np.random.rand(10, 4), columns = ['a', 'b', 'c', 'd'])
df2.plot.barh(stacked = True);
In [57]:
dfap['Production'].diff().hist()
Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9e8d0610>
In [58]:
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9e8f7ed0>
In [59]:
plt.figure()
dfap['Area'].diff().hist(color = 'k', bins=50)
Out[59]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9e76c890>
In [60]:
dfapb = dfap.sample(1000).groupby('Season').sum()
dfapb.plot.box()
Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9e5fe390>
In [61]:
dfapb
Out[61]:
Area Production
Season
Kharif 7861992.0 1.995853e+07
Rabi 4472667.0 8.505345e+06
Whole Year 1334646.0 1.113622e+09
In [62]:
dfapb = dfap.sample(10)
dfapb.boxplot()
Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9e57d950>
In [63]:
dfap.sample(10).boxplot(by='Crop_Year', figsize=(10, 3))
plt.xticks(rotation=90)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
Out[63]:
(array([1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]),
 <a list of 16 Text major ticklabel objects>)
In [64]:
dfap.sample(10).boxplot(by='Season', figsize=(10, 3))
plt.xticks(rotation=90)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
Out[64]:
(array([1, 2, 3, 1, 2, 3]), <a list of 6 Text major ticklabel objects>)
In [65]:
dfap.sample(10).boxplot(by='Crop', figsize=(10, 3))
plt.xticks(rotation=90)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
Out[65]:
(array([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 <a list of 18 Text major ticklabel objects>)
In [66]:
dfap.head(2)
Out[66]:
ColIndex State_Name District_Name Crop_Year Season Crop Area Production
38171 7919 Andhra Pradesh VISAKHAPATANAM 2002 Whole Year Bottle Gourd 37.0 0.0
38165 1263 Andhra Pradesh CHITTOOR 2003 Whole Year Cabbage 73.0 0.0
In [67]:
dfap.sample(10).boxplot(by="Season");
plt.xticks(rotation=90);
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
In [68]:
dfap.sample(10).boxplot(by=["Season", 'Crop'], 
                        figsize = (8,6), layout=(2, 1));
plt.xticks(rotation=90);
In [69]:
dfap.plot.scatter(x="Production", y='Area')
Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9de77410>
In [70]:
dfap.plot.scatter(x="Production", y='Area', s=5)
Out[70]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9dcbd150>
In [71]:
dfap.plot.scatter(x="Area", y='Production', s=2)
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9dc3e550>