import pandas as pd
df = pd.read_csv(r"C:\Users\lakshita rawat\Downloads\Jupyter_files\rbi_processed_2.csv")
df.dropna()

print(df.shape) #total rows and columns
print()

print(df.columns)
print()

print(df.isnull().sum())
print()

print(df.duplicated().sum())
print()

print(df.describe())
print()

print(df.info())

(68, 8)

Index(['Month', 'Year', 'UPI Transactions-Volume(Lakh)',
       'UPI Transactions-Value(Crores)', 'Card Payments-Volume(Lakh)',
       'Card Payments-Value(Crores)', 'Cash Payemnts-Volume(Lakh)',
       'Cash Payemnts-Value(Crores)'],
      dtype='object')

Month                             0
Year                              0
UPI Transactions-Volume(Lakh)     0
UPI Transactions-Value(Crores)    0
Card Payments-Volume(Lakh)        0
Card Payments-Value(Crores)       0
Cash Payemnts-Volume(Lakh)        0
Cash Payemnts-Value(Crores)       0
dtype: int64

0

              Year  UPI Transactions-Volume(Lakh)  \
count    68.000000                      68.000000   
mean   2022.176471                   77995.191176   
std       1.683468                   55448.362412   
min    2019.000000                    9996.000000   
25%    2021.000000                   26131.250000   
50%    2022.000000                   66813.500000   
75%    2024.000000                  121277.750000   
max    2025.000000                  186775.000000   

       UPI Transactions-Value(Crores)  Card Payments-Volume(Lakh)  \
count                    6.800000e+01                   68.000000   
mean                     1.188774e+06                 5187.367647   
std                      7.381061e+05                  588.618720   
min                      1.511410e+05                 2822.000000   
25%                      4.925245e+05                 4950.500000   
50%                      1.094800e+06                 5225.000000   
75%                      1.831173e+06                 5517.500000   
max                      2.514297e+06                 6518.000000   

       Card Payments-Value(Crores)  Cash Payemnts-Volume(Lakh)  \
count                    68.000000                   68.000000   
mean                 168209.279412                 5394.573529   
std                   45516.696054                  604.287648   
min                   44015.000000                 2950.000000   
25%                  130040.250000                 5068.250000   
50%                  178453.500000                 5544.000000   
75%                  203238.250000                 5735.750000   
max                  248709.000000                 6533.000000   

       Cash Payemnts-Value(Crores)  
count                    68.000000  
mean                 261045.308824  
std                   25760.992143  
min                  129098.000000  
25%                  252337.750000  
50%                  265439.500000  
75%                  278022.750000  
max                  295573.000000  

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68 entries, 0 to 67
Data columns (total 8 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Month                           68 non-null     object
 1   Year                            68 non-null     int64 
 2   UPI Transactions-Volume(Lakh)   68 non-null     int64 
 3   UPI Transactions-Value(Crores)  68 non-null     int64 
 4   Card Payments-Volume(Lakh)      68 non-null     int64 
 5   Card Payments-Value(Crores)     68 non-null     int64 
 6   Cash Payemnts-Volume(Lakh)      68 non-null     int64 
 7   Cash Payemnts-Value(Crores)     68 non-null     int64 
dtypes: int64(7), object(1)
memory usage: 4.4+ KB
None

df = df.rename(columns={
    'UPI Transactions-Volume(Lakh)': 'UPI-Volume',
    'Card Payments-Volume(Lakh)': 'Card-Volume',
    'Cash Payemnts-Volume(Lakh)': 'Cash-Volume'})
print(df.columns.tolist())

['Month', 'Year', 'UPI-Volume', 'UPI Transactions-Value(Crores)', 'Card-Volume', 'Card Payments-Value(Crores)', 'Cash-Volume', 'Cash Payemnts-Value(Crores)']

import pandas as pd
import matplotlib.pyplot as plt

# Group by Year and sum the volumes
yearly = df.groupby('Year')[
    ['UPI-Volume',
     'Card-Volume',
     'Cash-Volume']
].sum().reset_index()

# Calculate total and percentage share
yearly['Total'] = yearly[['UPI-Volume', 
                          'Card-Volume', 
                          'Cash-Volume']].sum(axis=1)

yearly['UPI Share (%)'] = (yearly['UPI-Volume'] / yearly['Total']) * 100
yearly['Card Share (%)'] = (yearly['Card-Volume'] / yearly['Total']) * 100
yearly['Cash Share (%)'] = (yearly['Cash-Volume'] / yearly['Total']) * 100

print(yearly[['Year', 'UPI Share (%)', 'Card Share (%)', 'Cash Share (%)']])

# Plot stacked area chart
plt.figure(figsize=(10,6))
plt.stackplot(yearly['Year'],
              yearly['UPI Share (%)'],
              yearly['Card Share (%)'],
              yearly['Cash Share (%)'],
              labels=['UPI', 'Card', 'Cash'],
              alpha=0.8)

plt.title("Share of UPI, Card, and Cash Payments (Volume %)")
plt.xlabel("Year")
plt.ylabel("Percentage Share")
plt.legend(loc="upper left")
plt.show()

   Year  UPI Share (%)  Card Share (%)  Cash Share (%)
0  2019      50.079264       24.627457       25.293278
1  2020      60.894343       19.199187       19.906470
2  2021      75.139287       12.111530       12.749183
3  2022      84.746680        7.339133        7.914186
4  2023      90.314988        4.498283        5.186729
5  2024      93.279271        3.364671        3.356058
6  2025      94.532605        2.980583        2.486811

import pandas as pd
import matplotlib.pyplot as plt

# Assuming df has: 'Year', 'UPI-Volume', 'Cash-Volume', 'Card-Volume'

# Aggregate by Year
yearly_data = df.groupby("Year")[["UPI-Volume", "Cash-Volume", "Card-Volume"]].sum().reset_index()

# Create Digital Payments column
yearly_data["Digital-Volume"] = yearly_data["UPI-Volume"] + yearly_data["Card-Volume"]

# Plot
plt.figure(figsize=(10,6))

# Digital Payments
plt.plot(yearly_data["Year"], yearly_data["Digital-Volume"], label="Digital Payments (UPI + Card)", marker="o", linewidth=2)
for x, y in zip(yearly_data["Year"], yearly_data["Digital-Volume"]):
    plt.text(x, y, f"{y:,}", ha="center", va="bottom", fontsize=9)

# Cash Payments
plt.plot(yearly_data["Year"], yearly_data["Cash-Volume"], label="Cash Payments", marker="o", linewidth=2, linestyle="--", color="red")
for x, y in zip(yearly_data["Year"], yearly_data["Cash-Volume"]):
    plt.text(x, y, f"{y:,}", ha="center", va="bottom", fontsize=9)

plt.title("Cash Usage vs Digital Payments (Yearly)", fontsize=14, weight="bold")
plt.xlabel("Year", fontsize=12)
plt.ylabel("Transaction Volume", fontsize=12)
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)

plt.tight_layout()
plt.show()

df = df.rename(columns={
    'UPI Transactions-Value(Crores)': 'UPI-Value',
    'Card Payments-Value(Crores)': 'Card-Value',
    'Cash Payments-Value(Crores)': 'Cash-Value'})
print(df.columns.tolist())

['Month', 'Year', 'UPI Transactions-Volume(Lakh)', 'UPI-Value', 'Card Payments-Volume(Lakh)', 'Card-Value', 'Cash Payments-Volume(Lakh)', 'Cash-Value']

import pandas as pd

# Step 1: Calculate total transaction value per row
df["Total-Value"] = df["UPI-Value"] + df["Card-Value"] + df["Cash-Value"]

# Step 2: Drop NaN rows if any
df = df.dropna()

# Step 3: Calculate percentages for each row
df["UPI-%"] = (df["UPI-Value"] / df["Total-Value"]) * 100
df["Card-%"] = (df["Card-Value"] / df["Total-Value"]) * 100
df["Cash-%"] = (df["Cash-Value"] / df["Total-Value"]) * 100

# Step 4: Group by year and take mean percentages
yearly = df.groupby("Year")[["UPI-%", "Card-%", "Cash-%"]].mean().reset_index()

# Final yearly proportion table
print(yearly)

     Year      UPI-%     Card-%     Cash-%
0  2019.0  32.312683  20.355177  47.332140
1  2020.0  44.548253  16.473410  38.978337
2  2021.0  59.647277  13.701220  26.651503
3  2022.0  69.958237  11.567489  18.474274
4  2023.0  76.294980   9.859160  13.845860
5  2024.0  81.303223   8.415398  10.281379
6  2025.0  83.629999   7.774241   8.595761

import matplotlib.pyplot as plt

# Drop NaN rows
df = df.dropna()

plt.figure(figsize=(10,6))

# Stacked bar chart using percentages
plt.bar(yearly["Year"], yearly["UPI-%"], label="UPI")
plt.bar(yearly["Year"], yearly["Card-%"], bottom=yearly["UPI-%"], label="Cards")
plt.bar(yearly["Year"], yearly["Cash-%"], bottom=yearly["UPI-%"] + yearly["Card-%"], label="Cash")

plt.xlabel("Year")
plt.ylabel("Proportion (%)")
plt.title("Yearly Proportion of UPI, Cards, and Cash Transactions")
plt.legend()
plt.show()

import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you already have a 'Year' column extracted from your dataset
plt.figure(figsize=(12,6))

# Convert data from wide → long format (for easy plotting with seaborn)
df_melted = df.melt(id_vars=["Year"], 
                    value_vars=["UPI-Value", "Cash-Value", "Card-Value"],
                    var_name="Payment Method", 
                    value_name="Transaction Value")

# Boxplot to show spread across years
sns.boxplot(data=df_melted, x="Year", y="Transaction Value", hue="Payment Method")

plt.title("Spread of Transaction Value Across Years by Payment Method")
plt.xlabel("Year")
plt.ylabel("Transaction Value (in Crores)")
plt.legend(title="Payment Method")
plt.xticks(rotation=45)
plt.show()

	Month	Year	UPI Transactions-Volume(Lakh)	UPI Transactions-Value(Crores)	Card Payments-Volume(Lakh)	Card Payments-Value(Crores)	Cash Payments-Volume(Lakh)	Cash Payments-Value(Crores)
0	Jun	2025	183950	2403931	5673	218552	4382	229907
1	May	2025	186775	2514297	5827	226810	4605	246201
2	Apr	2025	178934	2394926	5674	222351	4603	244747
3	Mar	2025	183015	2477222	5790	240549	4984	263892
4	Feb	2025	161062	2196482	5052	201522	4498	235618
...	...	...	...	...	...	...	...	...
63	Mar	2020	12468	206462	5247	98783	5440	248574
64	Feb	2020	13257	222517	6129	120708	6185	283280
65	Jan	2020	13050	216243	6518	129444	6533	295573
66	Dec	2019	13084	202521	6485	128488	6491	290918
67	Nov	2019	12188	189229	5943	118339	6273	282631

UPI Payments VS Card Payments VS Cash Payments¶

Compairing the total count of UPI, Cash and Card transactions¶

Insights from the graph-¶

Examining whether the cash usage are declining while digital payments (UPI + Card) are increasing¶

Insights from the graph-¶

Caculating the proportion (%) of UPI, Cards, and Cash in Total Transactions Values¶

Insights from the graph-¶

Analyzing the Spread of Transaction Values Across Years¶

Insights from the graph-¶