Spaces:

fmegahed
/

timeseries_visualization

Running

App Files Files Community

timeseries_visualization / scripts /download_fred_data.py

fmegahed

Cleaning up the app

789e257 about 1 month ago

raw

history blame contribute delete

4.11 kB

	"""Download real FRED datasets for the time-series visualization app.

	Uses only ``urllib.request`` (stdlib) — no extra dependencies.

	Series
	------
	* OHUR — Ohio Unemployment Rate (%, Monthly, 1976–present)
	* OHMFG — Ohio Manufacturing Employment (1000s, SA, Monthly)
	* INMFG — Indiana Manufacturing Employment
	* MIMFG — Michigan Manufacturing Employment
	* TXMFG — Texas Manufacturing Employment
	* CAMFG — California Manufacturing Employment

	Run once locally, then commit the resulting CSVs::

	python scripts/download_fred_data.py
	"""

	from __future__ import annotations

	import io
	import urllib.request
	from pathlib import Path

	import pandas as pd

	# Resolve paths relative to the project root (parent of scripts/)
	PROJECT_ROOT = Path(__file__).resolve().parent.parent
	DATA_DIR = PROJECT_ROOT / "data"
	DATA_DIR.mkdir(parents=True, exist_ok=True)

	_FRED_CSV_URL = "https://fred.stlouisfed.org/graph/fredgraph.csv?id={series_id}"


	def _fetch_fred(series_id: str) -> pd.DataFrame:
	"""Download a single FRED series and return a two-column DataFrame."""
	url = _FRED_CSV_URL.format(series_id=series_id)
	print(f" Downloading {series_id} …")
	with urllib.request.urlopen(url, timeout=30) as resp: # noqa: S310
	raw = resp.read().decode("utf-8")
	df = pd.read_csv(io.StringIO(raw))
	# FRED uses "." for missing values — coerce to NaN
	df.columns = ["date", series_id]
	df[series_id] = pd.to_numeric(df[series_id], errors="coerce")
	df["date"] = pd.to_datetime(df["date"])
	df = df.dropna()
	return df


	# ---------------------------------------------------------------------------
	# 1. Single series: Ohio Unemployment Rate
	# ---------------------------------------------------------------------------
	def build_ohio_unemployment() -> pd.DataFrame:
	df = _fetch_fred("OHUR")
	df = df.rename(columns={"OHUR": "unemployment_rate"})
	return df


	# ---------------------------------------------------------------------------
	# 2. Multi-series wide: Manufacturing Employment by State
	# ---------------------------------------------------------------------------
	_MFG_SERIES = {
	"OHMFG": "Ohio",
	"INMFG": "Indiana",
	"MIMFG": "Michigan",
	"TXMFG": "Texas",
	"CAMFG": "California",
	}


	def build_manufacturing_wide() -> pd.DataFrame:
	frames = []
	for sid, state_name in _MFG_SERIES.items():
	df = _fetch_fred(sid)
	df = df.rename(columns={sid: state_name})
	frames.append(df)

	# Inner-join on date so all states share the same date range
	wide = frames[0]
	for f in frames[1:]:
	wide = wide.merge(f, on="date", how="inner")

	wide = wide.sort_values("date").reset_index(drop=True)
	return wide


	# ---------------------------------------------------------------------------
	# 3. Multi-series long: same data melted
	# ---------------------------------------------------------------------------
	def build_manufacturing_long(wide: pd.DataFrame) -> pd.DataFrame:
	long = wide.melt(
	id_vars="date",
	var_name="state",
	value_name="manufacturing_employment",
	)
	long = long.sort_values(["date", "state"]).reset_index(drop=True)
	return long


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------
	def main() -> None:
	print("=== Downloading FRED data ===\n")

	# 1. Ohio Unemployment
	single = build_ohio_unemployment()
	out = DATA_DIR / "demo_ohio_unemployment.csv"
	single.to_csv(out, index=False)
	print(f" -> {len(single)} rows => {out}\n")

	# 2. Manufacturing wide
	wide = build_manufacturing_wide()
	out = DATA_DIR / "demo_manufacturing_wide.csv"
	wide.to_csv(out, index=False)
	print(f" -> {len(wide)} rows => {out}\n")

	# 3. Manufacturing long
	long = build_manufacturing_long(wide)
	out = DATA_DIR / "demo_manufacturing_long.csv"
	long.to_csv(out, index=False)
	print(f" -> {len(long)} rows => {out}\n")

	print("Done.")


	if __name__ == "__main__":
	main()