# start by importing the necessary packages #standard import numpy as np import pandas as pd #plt packages import seaborn as sns import altair as alt import matplotlib.pyplot as plt #streamlit import streamlit as st #Next importing the dataset, and doing elementary data analysis/discovery tips = sns.load_dataset("tips") tips['percentage'] = tips['tip']/tips['total_bill'] st.title("Tips Dataset Analysis") st.subheader("What is the effect of the group size on the tip in percentage and as amount and on total bill amount?") with st.sidebar: st.subheader("Filters") grp_size = sorted(tips['size'].unique().tolist()) selected_size = st.multiselect("Select group size(s):", options=grp_size, default=None) feature_options = { 'total_bill':'Total bill', 'tip': 'Tip', 'percentage': 'Tip percentage'} selected_feature = st.selectbox("Select feature to analyze:", options=list(feature_options.keys()), format_func=lambda x: feature_options[x]) #Making the dataframe "dynamic" to_app = tips[tips['size'].isin(selected_size)] if selected_size else tips #Dynamic text st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).") #KPI's k1, k2 = st.columns(2) kpi1=k1.metric(f"Average {feature_options.get(selected_feature)} per grp size(s)", f"{to_app[selected_feature].mean():.2f}") kpi2=k2.metric(f"Average {feature_options.get(selected_feature)} with all data", f"{tips[selected_feature].mean():.2f}") #Charts baby # Calculate the average tip percentage for each group size summary = to_app.groupby('size', as_index=False)[selected_feature].mean() # --- Visualization: bar chart of average tip percentage by group size --- chart = alt.Chart(summary).mark_bar().encode( x=alt.X('size:O', title='Group Size'), y=alt.Y(f'{selected_feature}:Q', title=f'Average {feature_options[selected_feature]}') ).properties( width=400, height=300, title=f"Average {feature_options[selected_feature]} by Group Size" ) # Add data labels text = alt.Chart(summary).mark_text( align='center', baseline='bottom', dy=-2, # Nudges text above bar fontSize=13 ).encode( x=alt.X('size:O'), y=alt.Y(f'{selected_feature}:Q'), text=alt.Text(f'{selected_feature}:Q', format='.1f') ) final_chart = chart + text st.altair_chart(final_chart, use_container_width=True) #st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).")