https://github.com/harish-siva/analyzing-personal-expenses

This project aims to simulate an expense tracker for an individual using the Faker library. The project generates realistic monthly expense data, processes and stores it in a SQL database, and creates SQL queries to derive insights into spending behavior. Streamlit app is developed to visualize these insights and showcase the results of SQL queries
https://github.com/harish-siva/analyzing-personal-expenses
data-visualization expense-tracking exploratory-data-analysis financial-analysis matplotlib pandas python seaborn sql sqlalchemy streamlit
Last synced: 7 months ago
JSON representation
Host: GitHub
URL: https://github.com/harish-siva/analyzing-personal-expenses
Owner: Harish-Siva
Created: 2025-01-25T14:31:03.000Z (9 months ago)
Default Branch: main
Last Pushed: 2025-01-25T15:07:27.000Z (9 months ago)
Last Synced: 2025-01-25T15:27:04.924Z (9 months ago)
Topics: data-visualization, expense-tracking, exploratory-data-analysis, financial-analysis, matplotlib, pandas, python, seaborn, sql, sqlalchemy, streamlit
Language: Python
Homepage:
Size: 0 Bytes
Stars: 0
Watchers: 1
Forks: 0
Open Issues: 0
Metadata Files:
- Readme: README.md
Awesome Lists containing this project

README

          import pandas as pd

import random

from faker import Faker

import calendar

# Initialize the Faker instance

fake = Faker()

# Define categories with corresponding plausible descriptions

category_descriptions = {

    'Food': [

        'Pizza order',

        'Grocery shopping at local market',

        'Dinner at Italian restaurant',

        'Coffee at cafe',

        'Bakery purchase'

    ],

    'Transportation': [

        'Taxi fare',

        'Monthly bus pass',

        'Train ticket to downtown',

        'Gas station refill',

        'Ride-sharing service payment'

    ],

    'Bills': [

        'Electricity bill payment',

        'Water utility bill',

        'Internet service provider charge',

        'Mobile phone bill',

        'Cable TV subscription'

      ],

    'Entertainment': [

        'Movie theater ticket',

        'Concert ticket purchase',

        'Streaming service subscription',

        'Amusement park entry fee',

        'Bookstore purchase'

    ],

    'Healthcare': [

        'Pharmacy medication purchase',

        'Doctor consultation fee',

        'Dental clinic payment',

        'Health insurance premium',

        'Optician eyewear purchase'

    ],

    'Groceries': [

        'Dairy products',

        'Meat and fish',

        'Kitchen items',

        'Snacks and Drinks',

        'Cooking Essentials'

    ],

    'Subscriptions': [

        'Amazon Prime Membership',

        'Netflix',

        'Spotify',

        'LinkedIn',

        'Disney Hotstar'

    ]

}

# Define cashback rates based on payment mode

cashback_rates = {

    'UPI': 0.05,     # 5% cashback for UPI

    'Online': 0.10,  # 10% cashback for Online

    # Other payment modes can have 0% cashback or specified rates

    'Cash': 0.15,

    'CardTransaction': 0.05,

    'Netbanking': 0.05

}

# Function to generate random dates within a specific month and year

def generate_random_dates(year, month, num_records):

    # Determine the number of days in the month

    num_days = pd.Period(year=year, month=month, freq='M').days_in_month

    

    if num_records <= num_days:

        # Generate unique random days within the month

        days = random.sample(range(1, num_days + 1), num_records)

    else:

        # Allow duplicates if num_records exceeds num_days

        days = [random.randint(1, num_days) for _ in range(num_records)]

    

    # Create dates for the specified month and year

    dates = [f"{year}-{month:02d}-{day:02d}" for day in days]

    return dates

# Function to generate monthly expense data

def generate_monthly_data(year, month, num_records):

    dates = generate_random_dates(year, month, num_records)

    categories = list(category_descriptions.keys())

    payment_modes = list(cashback_rates.keys())

    

    data = {

        'Date': dates,

        'Category': [],

        'Payment Mode': [],

        'Description': [],

        'Amount Paid': [],

        'Cashback': []

    }

    

    for _ in range(num_records):

        category = random.choice(categories)

        payment_mode = random.choice(payment_modes)

        description = random.choice(category_descriptions[category])

        amount_paid = round(random.uniform(10, 500), 2)

        cashback = round(amount_paid * cashback_rates[payment_mode], 2)

        

        data['Category'].append(category)

        data['Payment Mode'].append(payment_mode)

        data['Description'].append(description)

        data['Amount Paid'].append(amount_paid)

        data['Cashback'].append(cashback)

    

    return pd.DataFrame(data)

# Dictionary to store DataFrames for each month

monthly_dataframes = {}

# Generate and store data for each month

for month in range(1, 13):

    # Generate data for the current month

    monthly_data = generate_monthly_data(2023, month, num_records=180)

    

    # Add the month name column for clarity

    monthly_data['Month'] = calendar.month_name[month]

    

    # Store the DataFrame in the dictionary using the month name as the key

    monthly_dataframes[calendar.month_name[month]] = monthly_data

# Example: Accessing and displaying the first few rows of January's data

print(monthly_dataframes['January'].head())

import os

# Ensure the directory to save CSV files exists

output_dir = 'expense_data'

os.makedirs(output_dir, exist_ok=True)

# Iterate over each month's DataFrame in the dictionary

for month_name, month_data in monthly_dataframes.items():

    # Define the file path for the CSV file

    file_path = os.path.join(output_dir, f'{month_name}_2023.csv')

    

    # Save the DataFrame to a CSV file

    month_data.to_csv(file_path, index=False, encoding='utf-8')

    

    print(f'Saved {month_name} data to {file_path}')

import mysql.connector

# Create a new database and table if they do not exist

def create_database_and_table():

    connection = create_mysql_connection()

    cursor = connection.cursor()

    cursor.execute("CREATE DATABASE IF NOT EXISTS expenses_db")

    cursor.execute("USE expenses_db")

    cursor.execute('''

        CREATE TABLE IF NOT EXISTS expenses (

            date DATE,

            category VARCHAR(255),

            payment_mode VARCHAR(255),

            description VARCHAR(255),

            amount_paid FLOAT,

            cashback FLOAT,

            month VARCHAR(255)

        )

    ''')

    connection.commit()

    cursor.close()

    connection.close()

# Function to insert data into MySQL database

def insert_data_into_mysql(month_data):

    try:

        import pymysql

        

        mydb=pymysql.connect(

        host="localhost",         # Your MySQL server host (use 'localhost' if running locally)

        user="root",     # Your MySQL username

        password="sql_1234h", # Your MySQL password

        database="expenses_tracker" # The database name to connect to (you can create one before)

    )

        cursor = mydb.cursor()

        insert_query = '''

        INSERT INTO expenses (date, category, payment_mode, description, amount_paid, cashback, month)

        VALUES (%s, %s, %s, %s, %s, %s, %s)

        '''

        for _, row in month_data.iterrows():

            cursor.execute(insert_query, (

                row['Date'], row['Category'], row['Payment Mode'], row['Description'],

                row['Amount Paid'], row['Cashback'], row['Month']

            ))

        mydb.commit()

    except mysql.connector.Error as err:

        print(f"Error: {err}")

    

# Insert all monthly data into the database

for month_name, month_data in monthly_dataframes.items():

    insert_data_into_mysql(month_data)

    print(f'Data for {month_name} inserted into MySQL database')

import pymysql

import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

mydb = pymysql.connect(

  host="localhost",

  user="root",

  password="sql_1234h",

  database="expenses_tracker"

)

mycursor = mydb.cursor()

# Query the database to fetch data

mycursor.execute("SELECT DISTINCT * FROM expenses")

myresult = mycursor.fetchall()

# Get column names from the cursor description

columns = [col[0] for col in mycursor.description]

# Close the cursor and connection

mycursor.close()

# Convert the data to a pandas DataFrame

df = pd.DataFrame(myresult, columns=columns)

# Display the first few rows of the DataFrame

print(df.tail())

# 1.Number of Observations and Variables

print(f"Observations (Rows): {df.shape[0]}")

print(f"Variables (Columns): {df.shape[1]}")

# Data Types of Each Column

print("\nData Types:")

print(df.dtypes)

# Basic Statistics

print("\nBasic Statistics:")

print(df.describe(include='all'))

#2. Categorize variables into categorical and numerical

categorical_columns = df.select_dtypes(include=['object']).columns

numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns

print(f"Categorical Columns: {list(categorical_columns)}")

print(f"Numerical Columns: {list(numerical_columns)}")

# Check distributions

for col in numerical_columns:

    sns.histplot(df[col], kde=True)

    plt.title(f'Distribution of {col}')

    plt.show()

for col in categorical_columns:

    print(f"\n{col} Value Counts:")

    print(df[col].value_counts())

#3. Pairplot for relationships

sns.pairplot(df)

plt.show()

# Correlation heatmap

correlation_matrix = df[numerical_columns].corr()

plt.figure(figsize=(10, 8))

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')

plt.title("Correlation Between Numerical Variables")

plt.show()

from scipy.stats import zscore

#4. Calculate Z-scores for numerical columns

z_scores = df[numerical_columns].apply(zscore)

# Identify outliers based on Z-scores (>3 or <-3)

outliers = (z_scores > 3) | (z_scores < -3)

print("\nOutliers Detected (Per Column):")

print(outliers.sum())

# Visualize outliers using boxplots

for col in numerical_columns:

    plt.figure(figsize=(8, 4))

    sns.boxplot(data=df, x=col)

    plt.title(f'Boxplot for {col}')

    plt.show()

#5. Check for missing values

print("\nMissing Values Per Column:")

print(df.isnull().sum())

# Visualize missing values

plt.figure(figsize=(8, 6))

sns.heatmap(df.isnull(), cbar=False, cmap='viridis')

plt.title('Heatmap of Missing Values')

plt.show()

#6. Check for duplicate rows

duplicate_count = df.duplicated().sum()

print("\nNumber of Duplicate Rows:", duplicate_count)

# Remove duplicates if necessary

df = df.drop_duplicates()

print("Duplicates removed. Updated row count:", df.shape[0])

#7. Ensure 'date' column is in datetime format

if 'date' in df.columns:

    df['date'] = pd.to_datetime(df['date'])

    # Extract year and month

    df['year_month'] = df['date'].dt.to_period('M')

    # Plot trends over time

    if 'amount' in df.columns:

        monthly_expenses = df.groupby('year_month')['amount'].sum()

        monthly_expenses.plot(kind='line', marker='o', figsize=(10, 6))

        plt.title("Monthly Expense Trends")

        plt.xlabel("Year-Month")

        plt.ylabel("Total Amount")

        plt.grid()

        plt.show()

#8. Variability (standard deviation) in numerical columns

print("\nVariability (Standard Deviation) for Numerical Columns:")

print(df[numerical_columns].std())

# Coefficient of variation (CV) for numerical columns

cv = (df[numerical_columns].std() / df[numerical_columns].mean()) * 100

print("\nCoefficient of Variation (%):")

print(cv)

#9. Example: Compare actual expenses with expected budget (if applicable)

if 'amount' in df.columns and 'budget' in df.columns:

    df['difference'] = df['amount'] - df['budget']

    print("\nDifferences Between Actual and Budgeted Amounts:")

    print(df[['amount', 'budget', 'difference']].head())

    

    # Visualize discrepancies

    plt.figure(figsize=(8, 6))

    sns.histplot(df['difference'], kde=True, color='red')

    plt.title("Distribution of Differences (Actual vs. Budget)")

    plt.show()

#10. Grouping by 'category' and calculating the mean of 'amount_paid'

if 'category' in df.columns and 'amount_paid' in df.columns:

    category_summary = df.groupby('category')['amount_paid'].mean()

    print("\nAverage Amount Paid by Category:")

    print(category_summary)

    # Visualize the average amount paid by category

    category_summary.plot(kind='bar', figsize=(10, 6), color='teal')

    plt.title("Average Expense by Category")

    plt.xlabel("Category")

    plt.ylabel("Average Amount Paid")

    plt.xticks(rotation=45)

    plt.show()

else:

    print("Either 'category' or 'amount_paid' column is missing.")

import streamlit as st

import pandas as pd

import pymysql

import matplotlib.pyplot as plt

import seaborn as sns

import calendar

mydb = pymysql.connect(

  host="localhost",

  user="root",

  password="sql_1234h",

  database="expenses_tracker"

)

mycursor = mydb.cursor()

# Streamlit app title

st.title("SQL-Based Data Visualizations")

# Option list for the queries

option = [

    "1. Total Spending by Category in All Months",

    "2. Monthly Spending Trends",

    "3. Spending by Payment Mode",

    "4. Total Cashback Earned",

    "5. Top 5 Spending Categories",

    "6. Highest Spending Month",

    "7. Spending Breakdown by Day of the Week",

    "8. Average Spending Per Transaction",

    "9. Spending by Category and Month",

    "10. Spending by Payment Mode and Month",

    "11. Spending Distribution Across Categories (Boxplot)",

    "12. Spending Count by Payment Mode",

    "13. Average Spending in All Months",

    "14. Total Cashback by Payment Mode",

    "15. Total Spending on Bills in May",

    "16. Spending by Category in the Last 5 Days of year",

    "17. Food Spending Across All Months",

    "18. Spending by Category with Cashback",

    "19. Entertainment Spending Across All Months",

    "20. Average Spending by Category",

    "21. Highest Spending Categories in December",

    "22. Spending by Category and Payment Mode",

    "23. Highest Spending Categories in January",

    "24. Average Cashback by Payment Mode",

    "25. Monthly Cashback Earned",

    "26. December Spending",

    "27. Average Spending Per Transaction by Category",

    "28. Monthly Average Spending",

    "29. Spending by Category Over Time",

    "30. Average Spending Per Transaction by Category"

]

# Dropdown to select query

dv=st.selectbox("choose a option",option)

st.write(dv)

# Execute queries based on the selected option

if dv == "1. Total Spending by Category in All Months":

    st.subheader("Total Spending by Category in All Months")

    

    query = """

    SELECT category, MONTH(date) AS month, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY category, MONTH(date)

    ORDER BY category, month;

    """

    category_data = pd.read_sql(query, mydb)

    category_data = category_data.dropna(subset=['category', 'total_spent'])

    

    # Create a bar chart for total spending by category in all months

    # Reset index to use 'category' for x-axis and 'total_spent' for y-axis

    category_data['month'] = category_data['month'].astype(str)  # Make month a string for proper labeling

    st.bar_chart(category_data.set_index(['category', 'month'])['total_spent'].unstack(fill_value=0))

    

    # Display the raw data table

    st.write(category_data)

elif dv=="2. Monthly Spending Trends":

    # 2. Monthly Spending Trends (Line chart)

    st.subheader("Monthly Spending Trends")

    query = """

    SELECT MONTH(date) AS month, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY MONTH(date)

    ORDER BY month;

    """

    monthly_data = pd.read_sql(query, mydb)

    st.line_chart(monthly_data.set_index('month')['total_spent'])

    st.write(monthly_data)

elif dv == "3. Spending by Payment Mode":

    st.subheader("Spending by Payment Mode")

    query = """

    SELECT payment_mode, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY payment_mode;

    """

    payment_mode_data = pd.read_sql(query, mydb)

    fig, ax = plt.subplots()

    ax.pie(payment_mode_data['total_spent'], labels=payment_mode_data['payment_mode'], autopct='%1.1f%%', startangle=90)

    ax.axis('equal')

    st.pyplot(fig)

    st.write(payment_mode_data)

# 4. Total Cashback Earned (Metric)

elif dv == "4. Total Cashback Earned":

    st.subheader("Total Cashback Earned")

    query = """

    SELECT SUM(cashback) AS total_cashback

    FROM expenses

    WHERE cashback > 0;

    """

    cashback_data = pd.read_sql(query, mydb)

    st.metric(label="Total Cashback Earned", value=f"${cashback_data['total_cashback'][0]:,.2f}")

    st.write(cashback_data)

# 5. Top 5 Spending Categories (Table)

elif dv == "5. Top 5 Spending Categories":

    st.subheader("Top 5 Spending Categories")

    query = """

    SELECT category, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY category

    ORDER BY total_spent DESC

    LIMIT 5;

    """

    top_categories_data = pd.read_sql(query, mydb)

    st.write(top_categories_data)

# 6. Highest Spending Month (Table)

elif dv == "6. Highest Spending Month":

    st.subheader("Highest Spending Month (Considering Year)")

    query = """

    SELECT EXTRACT(YEAR FROM date) AS year, EXTRACT(MONTH FROM date) AS month, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY EXTRACT(YEAR FROM date), EXTRACT(MONTH FROM date)

    ORDER BY total_spent DESC

    LIMIT 1;

    """

    highest_month_year_data = pd.read_sql(query, mydb)

    st.write(highest_month_year_data)

# 7. Spending Breakdown by Day of the Week (Bar Chart)

elif dv == "7. Spending Breakdown by Day of the Week":

    st.subheader("Spending Breakdown by Day of the Week")

    query = """

    SELECT DAYNAME(date) AS day, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY day

    ORDER BY FIELD(day, 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday');

    """

    day_of_week_data = pd.read_sql(query, mydb)

    st.bar_chart(day_of_week_data.set_index('day')['total_spent'])

    st.write(day_of_week_data)

# 8. Average Spending Per Transaction (Metric)

elif dv == "8. Average Spending Per Transaction":

    st.subheader("Average Spending Per Transaction")

    query = """

    SELECT AVG(amount_paid) AS avg_spent

    FROM expenses;

    """

    avg_spent_data = pd.read_sql(query, mydb)

    st.metric(label="Average Transaction Amount", value=f"${avg_spent_data['avg_spent'][0]:,.2f}")

    st.write(avg_spent_data)

# 9. Spending by Category and Month (Stacked Bar Chart)

elif dv == "9. Spending by Category and Month":

    st.subheader("Spending by Category and Month (Considering Year)")

    query = """

    SELECT 

        EXTRACT(YEAR FROM date) AS year,

        EXTRACT(MONTH FROM date) AS month,

        category,

        SUM(amount_paid) AS total_spent

    FROM 

        expenses

    GROUP BY 

        EXTRACT(YEAR FROM date), EXTRACT(MONTH FROM date), category

    ORDER BY 

        year, month;

    """

    category_month_data = pd.read_sql(query, mydb)

    category_month_data['year_month'] = (

        category_month_data['year'].astype(str) + '-' + category_month_data['month'].astype(str)

    )

    category_month_pivot = category_month_data.pivot(index='year_month', columns='category', values='total_spent')

    st.bar_chart(category_month_pivot)

    st.write(category_month_data)

# 10. Spending by Payment Mode and Month (Stacked Bar Chart)

elif dv == "10. Spending by Payment Mode and Month":

    st.subheader("Spending by Payment Mode and Month (Stacked Bar Chart)")

    query = """

    SELECT EXTRACT(MONTH FROM date) AS month, payment_mode, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY EXTRACT(MONTH FROM date), payment_mode

    ORDER BY EXTRACT(MONTH FROM date);

    """

    payment_mode_month_data = pd.read_sql(query, mydb)

    payment_mode_month_pivot = payment_mode_month_data.pivot(index='month', columns='payment_mode', values='total_spent')

    st.bar_chart(payment_mode_month_pivot)

    st.write(payment_mode_month_data)

# 11. Spending Distribution Across Categories (Boxplot)

elif dv == "11. Spending Distribution Across Categories (Boxplot)":

    st.subheader("Spending Distribution Across Categories (Boxplot)")

    query = """

    SELECT category, amount_paid

    FROM expenses;

    """

    category_spend_data = pd.read_sql(query, mydb)

    # Create a boxplot

    fig, ax = plt.subplots(figsize=(10, 6))

    sns.boxplot(x='category', y='amount_paid', data=category_spend_data, ax=ax)

    ax.set_title("Spending Distribution Across Categories")

    ax.set_xlabel("Category")

    ax.set_ylabel("Amount Paid")

    st.pyplot(fig)

    st.write(category_spend_data)

# 12. Spending Count by Payment Mode (Bar Chart)

elif dv == "12. Spending Count by Payment Mode":

    st.subheader("Spending Count by Payment Mode")

    query = """

    SELECT payment_mode, COUNT(*) AS transaction_count

    FROM expenses

    GROUP BY payment_mode

    ORDER BY transaction_count DESC;

    """

    payment_mode_count_data = pd.read_sql(query, mydb)

    st.bar_chart(payment_mode_count_data.set_index('payment_mode')['transaction_count'])

    st.write(payment_mode_count_data)

# 13. Average Spending on Bills per Month

elif dv == "13. Average Spending in All Months":

    st.subheader("Average Spending in All Months")

    

    query = """

    SELECT MONTH(date) AS month, AVG(amount_paid) AS avg_spent

    FROM expenses

    GROUP BY MONTH(date)

    ORDER BY month;

    """

    avg_spending_data = pd.read_sql(query, mydb)

    

    # Create a line chart for average spending per month

    st.line_chart(avg_spending_data.set_index('month')['avg_spent'])

    

    # Display the raw data table

    st.write(avg_spending_data)

# 14. Total Cashback by Payment Mode (Pie Chart)

elif dv == "14. Total Cashback by Payment Mode":

    st.subheader("Total Cashback by Payment Mode")

    query = """

    SELECT payment_mode, SUM(cashback) AS total_cashback

    FROM expenses

    WHERE cashback > 0

    GROUP BY payment_mode;

    """

    cashback_by_payment_mode_data = pd.read_sql(query, mydb)

    fig, ax = plt.subplots()

    ax.pie(cashback_by_payment_mode_data['total_cashback'], labels=cashback_by_payment_mode_data['payment_mode'], autopct='%1.1f%%', startangle=90)

    ax.axis('equal')

    st.pyplot(fig)

    st.write(cashback_by_payment_mode_data)

# 15. Total Spending on Bills in May

elif dv == "15. Total Spending on Bills in May":

    st.subheader("Total Spending on Bills in May")

    

    query = """

    SELECT SUM(amount_paid) AS total_spent

    FROM expenses

    WHERE category = 'Bills' AND EXTRACT(MONTH FROM date) = 5;

    """

    bills_spending_data = pd.read_sql(query, mydb)

    total_spent = bills_spending_data['total_spent'][0] if bills_spending_data['total_spent'][0] is not None else 0

    st.metric(label="Total Spending on Bills in May", value=f"${total_spent:,.2f}")

    st.write(bills_spending_data)

# 16. Spending by Category in the Last 5 Days of year

elif dv == "16. Spending by Category in the Last 5 Days of year":

    st.subheader("Spending by Category in the Last 5 Days of year")

    

    query = """

    SELECT category, SUM(amount_paid) AS total_spent

    FROM expenses

    WHERE date BETWEEN '2023-12-27' AND '2023-12-31'

    GROUP BY category

    ORDER BY total_spent DESC;

    """

    last_5_days_year_data = pd.read_sql(query, mydb)

    st.bar_chart(last_5_days_year_data.set_index('category')['total_spent'])

    st.write(last_5_days_year_data)

# 17. Food Spending Across All Months

elif dv == "17. Food Spending Across All Months":

    st.subheader("Food Spending Across All Months")

    query = """

    SELECT EXTRACT(MONTH FROM date) AS month, SUM(amount_paid) AS total_food_spent

    FROM expenses

    WHERE category = 'Food'

    GROUP BY EXTRACT(MONTH FROM date)

    ORDER BY EXTRACT(MONTH FROM date);

    """

    food_spending_data = pd.read_sql(query, mydb)

    st.bar_chart(food_spending_data.set_index('month')['total_food_spent'])

    st.write(food_spending_data)

# 18. Spending by Category with Cashback (Bar Chart)

elif dv == "18. Spending by Category with Cashback":

    st.subheader("Spending by Category with Cashback")

    query = """

    SELECT category, SUM(amount_paid) AS total_spent, SUM(cashback) AS total_cashback

    FROM expenses

    WHERE cashback > 0

    GROUP BY category

    ORDER BY total_spent DESC;

    """

    category_cashback_data = pd.read_sql(query, mydb)

    # Create a bar chart for total spending

    st.subheader("Total Spending by Category with Cashback (Bar Chart)")

    st.bar_chart(category_cashback_data.set_index('category')['total_spent'])

    # Create a bar chart for total cashback

    st.subheader("Total Cashback by Category (Bar Chart)")

    st.bar_chart(category_cashback_data.set_index('category')['total_cashback'])

    st.write(category_cashback_data)

# 19. Entertainment Spending Across All Months

elif dv == "19. Entertainment Spending Across All Months":

    st.subheader("Entertainment Spending Across All Months")

    query = """

    SELECT EXTRACT(MONTH FROM date) AS month, SUM(amount_paid) AS total_entertainment_spent

    FROM expenses

    WHERE category = 'Entertainment'

    GROUP BY EXTRACT(MONTH FROM date)

    ORDER BY EXTRACT(MONTH FROM date);

    """

    entertainment_spending_data = pd.read_sql(query, mydb)

    st.bar_chart(entertainment_spending_data.set_index('month')['total_entertainment_spent'])

    st.write(entertainment_spending_data)

# 20. Average Spending by Category (Metric)

elif dv == "20. Average Spending by Category":

    st.subheader("Average Spending by Category")

    query = """

    SELECT category, AVG(amount_paid) AS avg_spent

    FROM expenses

    GROUP BY category

    ORDER BY avg_spent DESC;

    """

    avg_spending_category_data = pd.read_sql(query, mydb)

    st.bar_chart(avg_spending_category_data.set_index('category')['avg_spent'])

    st.write(avg_spending_category_data)

# 21. Highest Spending Categories in December

elif dv == "21. Highest Spending Categories in December":

    st.subheader("Highest Spending Categories in December (Line Chart)")

    

    query = """

    SELECT category, SUM(amount_paid) AS total_spent

    FROM expenses

    WHERE EXTRACT(MONTH FROM date) = 12

    GROUP BY category

    ORDER BY total_spent DESC

    LIMIT 5;

    """

    highest_spending_categories_december = pd.read_sql(query, mydb)

    st.line_chart(highest_spending_categories_december.set_index('category')['total_spent'])

    st.write(highest_spending_categories_december)

# 22. Spending by Category and Payment Mode (Stacked Bar Chart)

elif dv == "22. Spending by Category and Payment Mode":

    st.subheader("Spending by Category and Payment Mode (Stacked Bar Chart)")

    query = """

    SELECT category, payment_mode, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY category, payment_mode

    ORDER BY category;

    """

    category_payment_data = pd.read_sql(query, mydb)

    category_payment_pivot = category_payment_data.pivot(index='category', columns='payment_mode', values='total_spent')

    st.bar_chart(category_payment_pivot)

    st.write(category_payment_data)

# 23. Highest Spending Categories in January

elif dv == "23. Highest Spending Categories in January":

    st.subheader("Highest Spending Categories in January (Bar Chart)")

    

    query = """

    SELECT category, SUM(amount_paid) AS total_spent

    FROM expenses

    WHERE EXTRACT(MONTH FROM date) = 1

    GROUP BY category

    ORDER BY total_spent DESC

    LIMIT 5;

    """

    highest_spending_categories = pd.read_sql(query, mydb)

    st.bar_chart(highest_spending_categories.set_index('category')['total_spent'])

    st.write(highest_spending_categories)

# 24. Average Cashback by Payment Mode (Metric)

elif dv == "24. Average Cashback by Payment Mode":

    st.subheader("Average Cashback by Payment Mode (Bar Chart)")

    query = """

    SELECT payment_mode, AVG(cashback) AS avg_cashback

    FROM expenses

    WHERE cashback > 0

    GROUP BY payment_mode

    ORDER BY avg_cashback DESC;

    """

    avg_cashback_data = pd.read_sql(query, mydb)

    st.bar_chart(avg_cashback_data.set_index('payment_mode')['avg_cashback'])

    st.write(avg_cashback_data)

# 25. Monthly Cashback Earned (Bar Chart)

elif dv == "25. Monthly Cashback Earned":

    st.subheader("Monthly Cashback Earned (Line Chart)")

    query = """

    SELECT EXTRACT(MONTH FROM date) AS month, SUM(cashback) AS total_cashback

    FROM expenses

    WHERE cashback > 0

    GROUP BY EXTRACT(MONTH FROM date)

    ORDER BY month;

    """

    monthly_cashback_data = pd.read_sql(query, mydb)

    st.line_chart(monthly_cashback_data.set_index('month')['total_cashback'])

    st.write(monthly_cashback_data)

# 26. December Spending

elif dv == "26. December Spending":

    st.subheader("Spending in December (Category-wise)")

    

    # Query to get category-wise spending for December (Month 12)

    query_category = """

    SELECT category, SUM(amount_paid) AS total_spent

    FROM expenses

    WHERE EXTRACT(MONTH FROM date) = 12

    GROUP BY category

    ORDER BY total_spent DESC;

    """

    category_spending_data = pd.read_sql(query_category, mydb)

    

    # Create a bar chart for category-wise spending in December

    st.bar_chart(category_spending_data.set_index('category')['total_spent'])

    

    # Display the raw data table for category-wise spending in December

    st.write(category_spending_data)

        

# 27. Average Spending Per Transaction by Category

elif dv == "27. Average Spending Per Transaction by Category":

    st.subheader("Average Spending Per Transaction by Category")

    query = """

    SELECT category, AVG(amount_paid) AS avg_spent

    FROM expenses

    GROUP BY category

    ORDER BY avg_spent DESC;

    """

    avg_spent_category_data = pd.read_sql(query, mydb)

    st.bar_chart(avg_spent_category_data.set_index('category')['avg_spent'])

    st.write(avg_spent_category_data)

# 28. Monthly Average Spending (Metric)

elif dv == "28. Monthly Average Spending":

    st.subheader("Monthly Average Spending (Line Chart)")

    query = """

    SELECT EXTRACT(MONTH FROM date) AS month, AVG(amount_paid) AS avg_spent

    FROM expenses

    GROUP BY EXTRACT(MONTH FROM date)

    ORDER BY month;

    """

    monthly_avg_spending_data = pd.read_sql(query, mydb)

    st.line_chart(monthly_avg_spending_data.set_index('month')['avg_spent'])

    st.write(monthly_avg_spending_data)

# 29. Spending by Category Over Time (Line Chart)

elif dv == "29. Spending by Category Over Time":

    st.subheader("Spending by Category Over Time (Line Chart)")

    query = """

    SELECT DATE(date) AS spending_date, category, SUM(amount_paid) AS total_spent

    FROM expenses

    GROUP BY spending_date, category

    ORDER BY spending_date;

    """

    category_over_time_data = pd.read_sql(query, mydb)

    category_over_time_pivot = category_over_time_data.pivot(index='spending_date', columns='category', values='total_spent')

    st.line_chart(category_over_time_pivot)

    st.write(category_over_time_data)

# 30. Average Spending Per Transaction by Category

elif dv == "30. Average Spending Per Transaction by Category":

    st.subheader("Average Spending Per Transaction by Category")

    query = """

    SELECT category, AVG(amount_paid) AS avg_spent_per_transaction

    FROM expenses

    GROUP BY category

    ORDER BY avg_spent_per_transaction DESC;

    """

    avg_spent_category_data = pd.read_sql(query, mydb)

    # Create a bar chart for average spending per transaction by category

    st.bar_chart(avg_spent_category_data.set_index('category')['avg_spent_per_transaction'])

    # Display raw data table

    st.write(avg_spent_category_data)
ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Awesome

https://github.com/harish-siva/analyzing-personal-expenses

Awesome Lists containing this project

README