Assignment2 / app.py
Dun3Co's picture
Upload 2 files
59b7172 verified
# start by importing the necessary packages
#standard
import numpy as np
import pandas as pd
#plt packages
import seaborn as sns
import altair as alt
import matplotlib.pyplot as plt
#streamlit
import streamlit as st
#Next importing the dataset, and doing elementary data analysis/discovery
tips = sns.load_dataset("tips")
tips['percentage'] = tips['tip']/tips['total_bill']
st.title("Tips Dataset Analysis")
st.subheader("What is the effect of the group size on the tip in percentage and as amount and on total bill amount?")
with st.sidebar:
st.subheader("Filters")
grp_size = sorted(tips['size'].unique().tolist())
selected_size = st.multiselect("Select group size(s):", options=grp_size, default=None)
feature_options = {
'total_bill':'Total bill',
'tip': 'Tip',
'percentage': 'Tip percentage'}
selected_feature = st.selectbox("Select feature to analyze:", options=list(feature_options.keys()), format_func=lambda x: feature_options[x])
#Making the dataframe "dynamic"
to_app = tips[tips['size'].isin(selected_size)] if selected_size else tips
#Dynamic text
st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).")
#KPI's
k1, k2 = st.columns(2)
kpi1=k1.metric(f"Average {feature_options.get(selected_feature)} per grp size(s)", f"{to_app[selected_feature].mean():.2f}")
kpi2=k2.metric(f"Average {feature_options.get(selected_feature)} with all data", f"{tips[selected_feature].mean():.2f}")
#Charts baby
# Calculate the average tip percentage for each group size
summary = to_app.groupby('size', as_index=False)[selected_feature].mean()
# --- Visualization: bar chart of average tip percentage by group size ---
chart = alt.Chart(summary).mark_bar().encode(
x=alt.X('size:O', title='Group Size'),
y=alt.Y(f'{selected_feature}:Q', title=f'Average {feature_options[selected_feature]}')
).properties(
width=400,
height=300,
title=f"Average {feature_options[selected_feature]} by Group Size"
)
# Add data labels
text = alt.Chart(summary).mark_text(
align='center',
baseline='bottom',
dy=-2, # Nudges text above bar
fontSize=13
).encode(
x=alt.X('size:O'),
y=alt.Y(f'{selected_feature}:Q'),
text=alt.Text(f'{selected_feature}:Q', format='.1f')
)
final_chart = chart + text
st.altair_chart(final_chart, use_container_width=True)
#st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).")