| import streamlit as st |
| import pdfplumber |
| import pandas as pd |
|
|
| |
| CATEGORY_MAPPING = { |
| "Groceries": ["Walmart", "Kroger", "Whole Foods", "Costco", "Trader Joe", "Safeway"], |
| "Dining": ["McDonald's", "Starbucks", "Chipotle", "Subway", "Domino", "Pizza", "Burger", "Restaurant"], |
| "Utilities": ["Verizon", "AT&T", "T-Mobile", "Sprint", "Comcast", "Xfinity", "Con Edison", "Electric", "Water", "Gas"], |
| "Rent": ["Apartment", "Rent", "Landlord", "Lease"], |
| "Entertainment": ["Netflix", "Spotify", "Amazon Prime", "Hulu", "Disney", "Cinema"], |
| "Transport": ["Uber", "Lyft", "MetroCard", "Gas Station", "Shell", "Chevron"], |
| "Healthcare": ["Pharmacy", "CVS", "Walgreens", "Doctor", "Hospital", "Dental"], |
| "Shopping": ["Amazon", "Best Buy", "Target", "Walmart", "Ebay", "Retail"], |
| "Other": [] |
| } |
|
|
| |
| def classify_transaction(description): |
| description = str(description).lower() |
| for category, keywords in CATEGORY_MAPPING.items(): |
| if any(keyword.lower() in description for keyword in keywords): |
| return category |
| return "Other" |
|
|
| |
| def process_pdf(file): |
| if file is None: |
| st.error("No file uploaded.") |
| return None |
| |
| |
| with pdfplumber.open(file) as pdf: |
| text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) |
|
|
| |
| lines = text.split("\n") |
| transactions = [line for line in lines if any(char.isdigit() for char in line)] |
|
|
| |
| df = pd.DataFrame([line.split()[:3] for line in transactions], columns=["Date", "Description", "Amount"]) |
|
|
| |
| df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce") |
|
|
| |
| df["Description"] = df["Description"].fillna("Unknown") |
|
|
| |
| df["Category"] = df["Description"].apply(classify_transaction) |
|
|
| |
| category_summary = df.groupby("Category")["Amount"].sum().reset_index() |
|
|
| return df, category_summary |
|
|
| |
| st.title("π Credit Card Statement Classifier") |
| st.write("Upload a **PDF bank/credit card statement**, and this app will categorize transactions and show your spending summary.") |
|
|
| uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) |
|
|
| if uploaded_file is not None: |
| st.success("β
File uploaded successfully!") |
| |
| |
| df_result, category_summary = process_pdf(uploaded_file) |
| |
| if df_result is not None: |
| st.write("### π Classified Transactions:") |
| st.dataframe(df_result) |
| |
| st.write("### π° Spending Summary by Category:") |
| st.dataframe(category_summary) |
|
|
|
|