PANDAS DATAFRAME
Calculating Aging buckets
date based_Aging_Calculations
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
df = pd.read_excel('/content/ayaBook3.xlsx', sheet_name='book3_feb_21')
In [ ]:
df['Date_Diff'] = df['Open_as_on'] - df['Post_Date']
In [ ]:
# df.drop('DateDiff', inplace=True, axis=1)
In [ ]:
df.Date_Diff
Out[ ]:
0         25 days
1         24 days
2         24 days
3         24 days
4         25 days
           ...   
59658   3041 days
59659   2861 days
59660   2860 days
59661   2830 days
59662   2829 days
Name: Date_Diff, Length: 59663, dtype: timedelta64[ns]

to convert timedelta format to int16 format - makes calculations easier with f()

In [ ]:
# df['Date_Diff'] = pd.to_numeric(df['Date_Diff'])
# df['tdColumn'] = df['tdColumn'].dt.days.astype('int16')
df['Date_Diff'] = df['Date_Diff'].dt.days.astype('int16')
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59663 entries, 0 to 59662
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Account               59663 non-null  int64         
 1   CoCd                  59663 non-null  int64         
 2   DocumentNo            59663 non-null  int64         
 3   Type                  59663 non-null  object        
 4   Payment reference     0 non-null      float64       
 5   Amount in local cur.  59663 non-null  float64       
 6   Amount in loc.curr.2  59663 non-null  float64       
 7   Pstng Date            59663 non-null  datetime64[ns]
 8   Clearing              0 non-null      float64       
 9   Open as of            59663 non-null  datetime64[ns]
 10  Date_Diff             59663 non-null  int16         
dtypes: datetime64[ns](2), float64(4), int16(1), int64(3), object(1)
memory usage: 4.7+ MB
In [ ]:
def aging(d_days):
    if d_days < 30:
        return "30 days"
    elif d_days < 60:
        return "60 days"
    elif d_days < 90:
        return "90 days"
    elif d_days < 120:
        return "120 days"
    elif d_days < 150:
        return "150 days"
    elif d_days < 180:
        return "180 days"
    elif  d_days < 360:
        return "360 days"
    else:
        return "more than year"
In [ ]:
df['aging'] = df['Date_Diff'].apply(aging)
In [ ]:
df.to_excel("aging.xlsx", sheet_name="aging")

Job Completed


In [ ]: