Spaces:

MasanneckLab
/

Withings_Normalization_App

Sleeping

Withings_Normalization_App / Z-Score_Calculator.py

Lars Masanneck

Update Z-Score_Calculator.py

fe8c16e 3 months ago

14.6 kB

	import streamlit as st
	import normalizer_model
	import numpy as np
	import pandas as pd
	import altair as alt
	import plotly.graph_objects as go
	from scipy.stats import norm

	# Configure the Streamlit page before other commands
	st.set_page_config(
	page_title="Smartwatch Normative Z-Score Calculator",
	layout="wide",
	)


	# Cache the normative DataFrame load
	def load_norm_df(path: str):
	return normalizer_model.load_normative_table(path)


	load_norm_df = st.cache_data(load_norm_df)

	# Load dataset
	norm_df = load_norm_df("Table_1_summary_measure.csv")

	# Friendly biomarker labels
	BIOMARKER_LABELS = {
	"nb_steps": "Number of Steps",
	"max_steps": "Maximum Steps",
	"mean_active_time": "Mean Active Time",
	"sbp": "Systolic Blood Pressure",
	"dbp": "Diastolic Blood Pressure",
	"sleep_duration": "Sleep Duration",
	"avg_night_hr": "Average Night Heart Rate",
	"nb_moderate_active_minutes": "Moderate Active Minutes",
	"nb_vigorous_active_minutes": "Vigorous Active Minutes",
	"weight": "Weight",
	"pwv": "Pulse Wave Velocity",
	# add any others here
	}

	# Biomarkers temporarily disabled in the UI. Remove from this set to re-enable.
	DISABLED_BIOMARKERS = {"weight", "sbp", "dbp", "pwv", "nb_vigorous_active_minutes"}


	def main():
	if "disclaimer_shown" not in st.session_state:
	st.info(
	"These calculations are dedicated for scientific purposes only. "
	"For detailed questions regarding personal health data contact your "
	"healthcare professionals."
	)
	st.session_state.disclaimer_shown = True
	st.title("Smartwatch Normative Z-Score Calculator")
	st.sidebar.header("Input Parameters")

	# Region with default Western Europe
	regions = sorted(norm_df["area"].unique())
	if "Western Europe" in regions:
	default_region = "Western Europe"
	else:
	default_region = regions[0]
	region = st.sidebar.selectbox(
	"Region",
	regions,
	index=regions.index(default_region),
	)

	# Gender selection
	gender = st.sidebar.selectbox(
	"Gender",
	sorted(norm_df["gender"].unique()),
	)

	# Age input: choose between years or group
	st.sidebar.subheader("Age Input")
	age_input_mode = st.sidebar.radio(
	"Age input mode",
	("Years", "Group"),
	)
	if age_input_mode == "Years":
	age_years = st.sidebar.number_input(
	"Age (years)",
	min_value=0,
	max_value=120,
	value=30,
	step=1,
	)
	age_param = age_years
	else:
	age_groups = sorted(
	norm_df["Age"].unique(),
	key=lambda x: int(x.split("-")[0]),
	)
	age_group = st.sidebar.selectbox("Age group", [""] + age_groups)
	age_param = age_group

	# BMI input: choose between value or category
	st.sidebar.subheader("BMI Input")
	bmi_input_mode = st.sidebar.radio(
	"BMI input mode",
	("Value", "Category"),
	)
	if bmi_input_mode == "Value":
	bmi_val = st.sidebar.number_input(
	"BMI",
	min_value=0.0,
	max_value=100.0,
	value=24.0,
	step=0.1,
	format="%.1f",
	)
	bmi_param = bmi_val
	else:
	bmi_cats = sorted(norm_df["Bmi"].unique())
	bmi_cat = st.sidebar.selectbox("BMI category", [""] + bmi_cats)
	bmi_param = bmi_cat

	# Biomarker selection with friendly labels
	codes = sorted(
	c for c in norm_df["Biomarkers"].unique() if c not in DISABLED_BIOMARKERS
	)
	friendly = [BIOMARKER_LABELS.get(c, c.title()) for c in codes]
	default_idx = friendly.index("Number of Steps")
	selected_label = st.sidebar.selectbox(
	"Biomarker",
	friendly,
	index=default_idx,
	)
	biomarker = codes[friendly.index(selected_label)]

	# Value input with consistent float types
	default_value = 6500.0 if biomarker == "nb_steps" else 0.0
	# Determine upper bound from normative data
	mask = norm_df["Biomarkers"].str.lower() == biomarker.lower()
	max_val = float(norm_df.loc[mask, "max"].max())
	value = st.sidebar.number_input(
	f"{selected_label} value",
	min_value=0.0,
	max_value=max_val,
	value=default_value,
	step=1.0,
	)

	# Compute
	norm_button = st.sidebar.button("Compute Normative Z-Score")
	if norm_button:
	try:
	res = normalizer_model.compute_normative_position(
	value=value,
	biomarker=biomarker,
	age_group=age_param,
	region=region,
	gender=gender,
	bmi=bmi_param,
	normative_df=norm_df,
	)
	except Exception as e:
	st.error(f"Error: {e}")
	return

	# Show metrics
	st.subheader("Results")
	m1, m2, m3, m4, m5 = st.columns(5)
	m1.metric("Z-Score", f"{res['z_score']:.2f}")
	m2.metric("Percentile", f"{res['percentile']:.2f}")
	m3.metric("Mean", f"{res['mean']:.2f}")
	m4.metric("SD", f"{res['sd']:.2f}")
	m5.metric("Sample Size", res["n"])

	# Compute actual age group and BMI category for cohort summary
	age_group_str = normalizer_model._categorize_age(age_param, norm_df)
	bmi_cat = normalizer_model.categorize_bmi(bmi_param)
	st.markdown(
	f"Basis of calculation: Data from region {region}, "
	f"gender {gender}, age group {age_group_str}, "
	f"and BMI category **{bmi_cat}. "
	f"Sample size: {res['n']}**."
	)

	# Detailed statistics table
	st.subheader("Detailed Statistics")
	stats_df = pd.DataFrame(
	{
	"Statistic": [
	"Z-Score",
	"Percentile",
	"Mean",
	"SD",
	"Sample Size",
	"Median",
	"Q1",
	"Q3",
	"IQR",
	"MAD",
	"SE",
	"CI",
	],
	"Value": [
	f"{res['z_score']:.2f}",
	f"{res['percentile']:.2f}",
	f"{res['mean']:.2f}",
	f"{res['sd']:.2f}",
	res.get("n", "N/A"),
	f"{res.get('median', float('nan')):.2f}",
	f"{res.get('q1', float('nan')):.2f}",
	f"{res.get('q3', float('nan')):.2f}",
	f"{res.get('iqr', float('nan')):.2f}",
	f"{res.get('mad', float('nan')):.2f}",
	f"{res.get('se', float('nan')):.2f}",
	f"{res.get('ci', float('nan')):.2f}",
	],
	}
	)
	st.table(stats_df)

	# Normality assumption note
	note = (
	"*Note: Percentile and z-score estimation assume a normal "
	"distribution based on global Withings user data stratified by "
	"the parameters entered.*"
	)
	st.write(note)

	# Normality checks
	import normality_checks as nc

	R = nc.iqr_tail_heaviness(res["iqr"], res["sd"])
	q1_z, q3_z = nc.quartile_z_scores(
	res["mean"],
	res["sd"],
	res["q1"],
	res["q3"],
	)
	skew = nc.pearson_skewness(res["mean"], res["median"], res["sd"])
	st.subheader("Normality Heuristics")

	# Determine skewness interpretation
	if abs(skew) <= 0.1:
	skew_interp = "Symmetric (OK)"
	elif abs(skew) <= 0.5:
	skew_interp = f"{'Right' if skew > 0 else 'Left'} slight skew (usually OK)"
	elif abs(skew) <= 1.0:
	skew_interp = f"{'Right' if skew > 0 else 'Left'} noticeable skew"
	else:
	skew_interp = f"{'Right' if skew > 0 else 'Left'} strong skew"

	norm_checks = pd.DataFrame(
	{
	"Check": [
	"IQR/SD",
	"Q1 z-score",
	"Q3 z-score",
	"Pearson Skewness",
	],
	"Value": [
	f"{R:.2f}",
	f"{q1_z:.2f}",
	f"{q3_z:.2f}",
	f"{skew:.2f}",
	],
	"Flag": [
	(
	"Heavier tails"
	if R > 1.5
	else "Lighter tails" if R < 1.2 else "OK"
	),
	"Deviation" if abs(q1_z + 0.6745) > 0.1 else "OK",
	"Deviation" if abs(q3_z - 0.6745) > 0.1 else "OK",
	skew_interp,
	],
	}
	)
	st.table(norm_checks)

	# Add skewness interpretation guide
	st.markdown(
	"""
	Pearson Skewness Interpretation:
	- ≈ 0: Symmetric distribution
	- ±0.1 to ±0.5: Slight/moderate skew
	- ±0.5 to ±1: Noticeable skew
	- larger than±1: Strong skew

	- Positive values: Right skew (longer tail on right)
	- Negative values: Left skew (longer tail on left)
	"""
	)

	# Warning if heuristic checks indicate non-normality
	if any(("OK" not in val) for val in norm_checks["Flag"]):
	st.warning(
	"Warning: Heuristic checks indicate possible deviations "
	"from normality; interpret z-score and percentiles with "
	"caution."
	)

	# Skew-Corrected Results (optional)
	with st.expander("Optional: Skew-Corrected Results"):
	st.write("Adjusts for skew via Pearson Type III back-transform.")
	st.write("Error often <1 percentile point when \|skew\| ≤ 0.5.")
	st.write("Usually more useful for stronger skewed distributions.")
	st.write("Note: This is a heuristic and may not always be accurate.")
	res_skew = normalizer_model.compute_skew_corrected_position(
	value=value,
	mean=res["mean"],
	sd=res["sd"],
	median=res["median"],
	)
	pct_skew = f"{res_skew['percentile_skew_corrected']:.2f}"
	sc1, sc2 = st.columns(2)
	sc1.metric(
	"Skew-Corrected Z-Score",
	f"{res_skew['z_skew_corrected']:.2f}",
	)
	sc2.metric(
	"Skew-Corrected Percentile",
	pct_skew,
	)

	st.markdown("---")
	st.subheader("Visualizations")
	# Prepare data for normal distribution
	z_vals = np.linspace(-4, 4, 400)
	density = norm.pdf(z_vals)
	df_chart = pd.DataFrame({"z": z_vals, "density": density})
	# Shade area up to observed z-score
	area = (
	alt.Chart(df_chart)
	.mark_area(color="orange", opacity=0.3)
	.transform_filter(alt.datum.z <= res["z_score"])
	.encode(
	x=alt.X(
	"z:Q",
	title="z-score",
	),
	y=alt.Y(
	"density:Q",
	title="Density",
	),
	)
	)
	# Plot distribution line
	line = (
	alt.Chart(df_chart)
	.mark_line(color="orange")
	.encode(
	x="z:Q",
	y="density:Q",
	)
	)
	# Vertical line at observed z
	vline = (
	alt.Chart(pd.DataFrame({"z": [res["z_score"]]}))
	.mark_rule(color="orange")
	.encode(x="z:Q")
	)
	chart = (area + line + vline).properties(
	width=600,
	height=300,
	title="Standard Normal Distribution",
	)
	st.altair_chart(chart, use_container_width=True)
	# Text summary
	st.write(
	f"Your value is z = {res['z_score']:.2f}, which places you in "
	f"the {res['percentile']:.1f}th percentile of a normal "
	f"distribution."
	)
	# Bullet chart showing z-score location
	# Using a horizontal bullet gauge from -3 to 3 SD
	bullet = go.Figure(
	go.Indicator(
	mode="number+gauge",
	value=res["z_score"],
	number={"suffix": " SD"},
	gauge={
	"shape": "bullet",
	"axis": {
	"range": [-3, 3],
	"tickmode": "linear",
	"dtick": 0.5,
	},
	"bar": {"color": "orange"},
	},
	)
	)
	bullet.update_layout(
	height=150,
	margin={"t": 20, "b": 20, "l": 20, "r": 20},
	)
	st.plotly_chart(bullet, use_container_width=True)
	# Show percentile text
	st.write(f"Percentile: {res['percentile']:.1f}%")
	else:
	st.sidebar.info(
	"Fill in all inputs and click Compute " "to get normative Z-score."
	)

	# Z-Score Classification Guide (always visible)
	st.markdown("---")
	with st.expander("📊 Z-Score Classification Guide"):
	st.markdown("""
	How to interpret Z-Scores:

	\| Z-Score Range \| Classification \| Percentile Range \|
	\|:-------------:\|:--------------:\|:----------------:\|
	\| z < -2.0 \| Very Low \| < 2.3% \|
	\| -2.0 ≤ z < -0.5 \| Below Average \| 2.3% - 30.9% \|
	\| -0.5 ≤ z < 0.5 \| Average \| 30.9% - 69.1% \|
	\| 0.5 ≤ z < 2.0 \| Above Average \| 69.1% - 97.7% \|
	\| z ≥ 2.0 \| Very High \| > 97.7% \|

	Context matters:
	- For steps, sleep duration, and active minutes: Higher values are generally better ✓
	- For heart rate: Lower resting values are generally better ✓

	A z-score of 0 means you are exactly at the population average for your demographic group.
	""")

	# Footer
	st.markdown("---")
	st.markdown(
	"Built with ❤️ in Düsseldorf. © Lars Masanneck 2026. "
	"Thanks to Withings for sharing this data openly."
	)
	st.markdown(
	"*This tool is part of the publication "
	"\"Population-Normalised Wearable Metrics Quantify Real-World Disability "
	"in Multiple Sclerosis\" currently in review.*"
	)


	if __name__ == "__main__":
	main()