PANDAS - SERIES
class work
PadhAI_Pandas_Series
In [ ]:
import pandas as pd
import numpy as np

Creating SERIES objects

all the values shoud be of the same type, cannot have one float, one as integer, one as boolean etc...

In [ ]:
s = pd.Series([0,1,1,2,3,5,8,13])
In [ ]:
print(s)
0     0
1     1
2     1
3     2
4     3
5     5
6     8
7    13
dtype: int64
In [ ]:
s = pd.Series([0.0,1,1,2,3,5,8,13])
In [ ]:
print(s)
0     0.0
1     1.0
2     1.0
3     2.0
4     3.0
5     5.0
6     8.0
7    13.0
dtype: float64
In [ ]:
s.values
Out[ ]:
array([ 0.,  1.,  1.,  2.,  3.,  5.,  8., 13.])
In [ ]:
s.index
Out[ ]:
RangeIndex(start=0, stop=8, step=1)
In [ ]:
print(s.index)
RangeIndex(start=0, stop=8, step=1)
In [ ]:
for v in s.values: print(v)
0.0
1.0
1.0
2.0
3.0
5.0
8.0
13.0
In [ ]:
for i in s.index: print (i)
0
1
2
3
4
5
6
7

returns a tuple with values and indicies

In [ ]:
for item in zip(s.values, s.index): print (item)
(0.0, 0)
(1.0, 1)
(1.0, 2)
(2.0, 3)
(3.0, 4)
(5.0, 5)
(8.0, 6)
(13.0, 7)
In [ ]:
s[0]
Out[ ]:
0.0
In [ ]:
s[2]
Out[ ]:
1.0
In [ ]:
mercury = pd.Series([0.33, 57.9, 4222.6], ['mass', 'diameter', 'dayLength'])
In [ ]:
print(mercury)
mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64
pd.Series(data=None, index=None, dtype: ExtensionDtype | str | dtype | Type[str] | Type[float] | Type[int] | Type[complex] | Type[bool] | Type[object] | None = None, name=None, copy: bool = False, fastpath: bool = False) -> None
In [ ]:
print(mercury('mass'))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-24-d6ae6c0636e3> in <module>()
----> 1 print(mercury('mass'))

TypeError: 'Series' object is not callable
In [ ]:
print(mercury['mass'])
0.33
In [ ]:
print(mercury['dayLength'])
4222.6
In [ ]:
mercury.mass
Out[ ]:
0.33
In [ ]:
mercury.dayLength
Out[ ]:
4222.6
In [ ]:
arr = np.random.randint(0, 10, 10)
In [ ]:
print(arr)
[5 3 9 5 4 3 5 9 3 6]
In [ ]:
arr
Out[ ]:
array([5, 3, 9, 5, 4, 3, 5, 9, 3, 6])
In [ ]:
rand_series = pd.Series(arr)
In [ ]:
print(rand_series)
0    5
1    3
2    9
3    5
4    4
5    3
6    5
7    9
8    3
9    6
dtype: int64
In [ ]:
ind = np.arange(10, 20)
In [ ]:
rand_seriess = pd.Series(arr, ind)
In [ ]:
print(rand_seriess)
10    5
11    3
12    9
13    5
14    4
15    3
16    5
17    9
18    3
19    6
dtype: int64
In [ ]:
## mercury = pd.Series([0.33, 57.9, 4222.6], ['mass', 'diameter', 'dayLength'])

 d = {}
 d['mass'] = 0.33
 d['diameter'] = 57.9
 d['dayLength'] = 4222.6
In [ ]:
mercury = pd.Series(d)
In [ ]:
print(mercury)
mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64
In [ ]:
mercury = pd.Series(d, index = ['mass', 'diameter'])
In [ ]:
print(mercury)
mass         0.33
diameter    57.90
dtype: float64
In [ ]:
mercury = pd.Series(d, index = ['mass', 'diameter', 'dayLength'])
In [ ]:
print(mercury)
mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64

loc and iloc (implicit location)

works similar to list functionality

In [ ]:
import pandas as pd
import numpy as np
In [ ]:
s = pd.Series([0.0,1,1,2,3,5,8,13], index=[1,2,3,4,5,6,7,8])
In [ ]:
print(s)
1     0.0
2     1.0
3     1.0
4     2.0
5     3.0
6     5.0
7     8.0
8    13.0
dtype: float64
In [ ]:
s.loc[4]
Out[ ]:
2.0
In [ ]:
s.iloc[4]
Out[ ]:
3.0
In [ ]:
s.iloc[0]
Out[ ]:
0.0

the following code results in an error because, loc looks for zero and doesn't find it.

In [ ]:
s.loc[0]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3360             try:
-> 3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:

/usr/local/lib/python3.7/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

/usr/local/lib/python3.7/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-10-a1d5330d14f8> in <module>()
----> 1 s.loc[0]

/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
    929 
    930             maybe_callable = com.apply_if_callable(key, self.obj)
--> 931             return self._getitem_axis(maybe_callable, axis=axis)
    932 
    933     def _is_scalar_access(self, key: tuple):

/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1162         # fall thru to straight lookup
   1163         self._validate_key(key, axis)
-> 1164         return self._get_label(key, axis=axis)
   1165 
   1166     def _get_slice_axis(self, slice_obj: slice, axis: int):

/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _get_label(self, label, axis)
   1111     def _get_label(self, label, axis: int):
   1112         # GH#5667 this will fail if the label is not present in the axis.
-> 1113         return self.obj.xs(label, axis=axis)
   1114 
   1115     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   3774                 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
   3775         else:
-> 3776             loc = index.get_loc(key)
   3777 
   3778             if isinstance(loc, np.ndarray):

/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:
-> 3363                 raise KeyError(key) from err
   3364 
   3365         if is_scalar(key) and isna(key) and not self.hasnans:

KeyError: 0
In [ ]:
mercury.iloc[0]
Out[ ]:
0.33
In [ ]:
mercury.loc['mass']
Out[ ]:
0.33
In [ ]:
mercury.iloc[-1]
Out[ ]:
4222.6
In [ ]:
mercury.iloc[0:2]   # 2 is exclusive
Out[ ]:
mass         0.33
diameter    57.90
dtype: float64
In [ ]:
mercury.iloc[:2]
Out[ ]:
mass         0.33
diameter    57.90
dtype: float64
In [ ]:
mercury.iloc[2:]
Out[ ]:
dayLength    4222.6
dtype: float64
In [ ]:
mercury.loc['dayLength']
Out[ ]:
4222.6

Simple operations with Series

In [ ]:
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146],
                 index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
In [ ]:
print(mass)
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64
In [ ]:
mass.iloc[3:]
Out[ ]:
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64
In [ ]:
mass.iloc[1]
Out[ ]:
4.87
In [ ]:
mass.loc['Earth']
Out[ ]:
5.97
In [ ]:
mass['Earth']
Out[ ]:
5.97
In [ ]:
mass['Earth':'Jupiter'] # here Jupiter is inclusive, unlike lists being exclusive
Out[ ]:
Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64
In [ ]:
mass.iloc[2:5]
Out[ ]:
Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64
In [ ]:
print(mass)
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64
In [ ]:
mass > 100
Out[ ]:
Mercury    False
Venus      False
Earth      False
Mars       False
Jupiter     True
Saturn      True
Uranus     False
Neptune     True
Pluto      False
dtype: bool
In [ ]:
mass[mass > 100]
Out[ ]:
Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64
In [ ]:
mass_gr_100 = mass[mass > 100]
In [ ]:
print(mass_gr_100)
Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64
In [ ]:
mass[(mass > 100) & (mass < 1000)]
Out[ ]:
Saturn     568.0
Neptune    102.0
dtype: float64

standard basic operations are applicable here... like any other numpy array

In [ ]:
mass * 2
Out[ ]:
Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64
In [ ]:
mass / 10
Out[ ]:
Mercury      0.03300
Venus        0.48700
Earth        0.59700
Mars         0.06420
Jupiter    189.80000
Saturn      56.80000
Uranus       8.68000
Neptune     10.20000
Pluto        0.00146
dtype: float64
In [ ]:
np.mean(mass)
Out[ ]:
296.29184444444445
In [ ]:
np.median(mass)
Out[ ]:
5.97
In [ ]:
np.amin(mass)
Out[ ]:
0.0146
In [ ]:
np.amax(mass)
Out[ ]:
1898.0
In [ ]:
mass + mass
Out[ ]:
Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64

if indexes doesn't match

here, first, indicies are aligned and then subsequent operation is performed unlike numpy arrays

In [ ]:
big_mass = mass[(mass > 100)]
In [ ]:
big_mass
Out[ ]:
Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64
In [ ]:
new_mass = mass + big_mass
In [ ]:
new_mass
Out[ ]:
Earth         NaN
Jupiter    3796.0
Mars          NaN
Mercury       NaN
Neptune     204.0
Pluto         NaN
Saturn     1136.0
Uranus        NaN
Venus         NaN
dtype: float64
In [ ]:
pd.isnull(new_mass)
Out[ ]:
Earth       True
Jupiter    False
Mars        True
Mercury     True
Neptune    False
Pluto       True
Saturn     False
Uranus      True
Venus       True
dtype: bool
In [ ]:
new_mass[~pd.isnull(new_mass)]
Out[ ]:
Jupiter    3796.0
Neptune     204.0
Saturn     1136.0
dtype: float64

to add another element to the existing series...

In [ ]:
mass['Moon'] = 0.7346
In [ ]:
mass
Out[ ]:
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
dtype: float64
In [ ]:
mass.drop(['Pluto'])
Out[ ]:
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Moon          0.7346
dtype: float64
In [ ]:
mass
Out[ ]:
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
dtype: float64
In [ ]:
mass.drop(['Moon'], inplace=True)
In [ ]:
mass
Out[ ]:
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64

Collect number for diameters of these planets and store it in a series object. Then given the two series objects mass and diamter, compute the density of each planet

  • density equals mass over volume (d = m/v):
  • volume v = 4/1 pi r cube
In [ ]:
print(0.330 * 4879)
1610.0700000000002
In [ ]:
diameter = pd.Series([4879, 120104, 12756, 6792, 142984, 120536, 51118, 49528, 2376],
                 index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])
In [ ]:
mass
Out[ ]:
Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64
In [ ]:
diameter
Out[ ]:
Mercury      4879
Venus      120104
Earth       12756
Mars         6792
Jupiter    142984
Saturn     120536
Uranus      51118
Neptune     49528
Pluto        2376
dtype: int64
In [ ]:
radius = diameter / 2
In [ ]:
print(radius)
Mercury     2439.5
Venus      60052.0
Earth       6378.0
Mars        3396.0
Jupiter    71492.0
Saturn     60268.0
Uranus     25559.0
Neptune    24764.0
Pluto       1188.0
dtype: float64
In [ ]:
volume = 4/3 * ((3.14) * diamter ** 3)
In [ ]:
volume
Out[ ]:
Mercury    4.862514e+11
Venus      7.253386e+15
Earth      8.689843e+12
Mars       1.311781e+12
Jupiter    1.223857e+16
Saturn     7.331937e+15
Uranus     5.592293e+14
Neptune    5.086520e+14
Pluto      5.615749e+10
dtype: float64
In [ ]:
density = mass / volume
In [ ]:
density
Out[ ]:
Mercury    6.786613e-13
Venus      6.714106e-16
Earth      6.870090e-13
Mars       4.894109e-13
Jupiter    1.550835e-13
Saturn     7.746930e-14
Uranus     1.552136e-13
Neptune    2.005300e-13
Pluto      2.599831e-13
dtype: float64
In [ ]:
dnsity = mass / ((4/3) * ((3.14) * radius ** 3))
In [ ]:
dnsity
Out[ ]:
Mercury    5.429291e-12
Venus      5.371284e-15
Earth      5.496072e-12
Mars       3.915287e-12
Jupiter    1.240668e-12
Saturn     6.197544e-13
Uranus     1.241709e-12
Neptune    1.604240e-12
Pluto      2.079865e-12
dtype: float64

working with Nifty dataset

In [ ]:
nift = pd.read_csv("/content/nifty.csv", index_col = 0).iloc[:,0]
In [ ]:
nift.head()
Out[ ]:
Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
Name: Close, dtype: float64
In [ ]:
nift.tail()
Out[ ]:
Date
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, dtype: float64
In [ ]:
data = [0,1,1,2,3,5,8,13,21]
s = pd.Series(data)
b = s.diff()
print(b.iloc[5])
2.0
In [ ]:
s = pd.Series("a", index=[1,2,3,4])
print(s.loc[2])
a
In [2]:
!pip install nbconvert
Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (5.6.1)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (0.4)
Requirement already satisfied: nbformat>=4.4 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (5.1.3)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (1.5.0)
Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert) (0.6.0)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/dist-packages (from nbconvert) (4.9.2)
Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from nbconvert) (2.6.1)
Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert) (4.1.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (0.8.4)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (5.1.1)
Requirement already satisfied: jinja2>=2.4 in /usr/local/lib/python3.7/dist-packages (from nbconvert) (2.11.3)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert) (0.7.1)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from jinja2>=2.4->nbconvert) (2.0.1)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.4->nbconvert) (0.2.0)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.4->nbconvert) (4.3.3)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (0.18.1)
Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (21.4.0)
Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (5.4.0)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (3.10.0.2)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (4.11.2)
Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (3.7.0)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert) (1.15.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert) (21.3)
Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert) (0.5.1)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->bleach->nbconvert) (3.0.7)
In [ ]: