| """Download real FRED datasets for the time-series visualization app. |
| |
| Uses only ``urllib.request`` (stdlib) — no extra dependencies. |
| |
| Series |
| ------ |
| * OHUR — Ohio Unemployment Rate (%, Monthly, 1976–present) |
| * OHMFG — Ohio Manufacturing Employment (1000s, SA, Monthly) |
| * INMFG — Indiana Manufacturing Employment |
| * MIMFG — Michigan Manufacturing Employment |
| * TXMFG — Texas Manufacturing Employment |
| * CAMFG — California Manufacturing Employment |
| |
| Run once locally, then commit the resulting CSVs:: |
| |
| python scripts/download_fred_data.py |
| """ |
|
|
| from __future__ import annotations |
|
|
| import io |
| import urllib.request |
| from pathlib import Path |
|
|
| import pandas as pd |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| DATA_DIR = PROJECT_ROOT / "data" |
| DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
| _FRED_CSV_URL = "https://fred.stlouisfed.org/graph/fredgraph.csv?id={series_id}" |
|
|
|
|
| def _fetch_fred(series_id: str) -> pd.DataFrame: |
| """Download a single FRED series and return a two-column DataFrame.""" |
| url = _FRED_CSV_URL.format(series_id=series_id) |
| print(f" Downloading {series_id} …") |
| with urllib.request.urlopen(url, timeout=30) as resp: |
| raw = resp.read().decode("utf-8") |
| df = pd.read_csv(io.StringIO(raw)) |
| |
| df.columns = ["date", series_id] |
| df[series_id] = pd.to_numeric(df[series_id], errors="coerce") |
| df["date"] = pd.to_datetime(df["date"]) |
| df = df.dropna() |
| return df |
|
|
|
|
| |
| |
| |
| def build_ohio_unemployment() -> pd.DataFrame: |
| df = _fetch_fred("OHUR") |
| df = df.rename(columns={"OHUR": "unemployment_rate"}) |
| return df |
|
|
|
|
| |
| |
| |
| _MFG_SERIES = { |
| "OHMFG": "Ohio", |
| "INMFG": "Indiana", |
| "MIMFG": "Michigan", |
| "TXMFG": "Texas", |
| "CAMFG": "California", |
| } |
|
|
|
|
| def build_manufacturing_wide() -> pd.DataFrame: |
| frames = [] |
| for sid, state_name in _MFG_SERIES.items(): |
| df = _fetch_fred(sid) |
| df = df.rename(columns={sid: state_name}) |
| frames.append(df) |
|
|
| |
| wide = frames[0] |
| for f in frames[1:]: |
| wide = wide.merge(f, on="date", how="inner") |
|
|
| wide = wide.sort_values("date").reset_index(drop=True) |
| return wide |
|
|
|
|
| |
| |
| |
| def build_manufacturing_long(wide: pd.DataFrame) -> pd.DataFrame: |
| long = wide.melt( |
| id_vars="date", |
| var_name="state", |
| value_name="manufacturing_employment", |
| ) |
| long = long.sort_values(["date", "state"]).reset_index(drop=True) |
| return long |
|
|
|
|
| |
| |
| |
| def main() -> None: |
| print("=== Downloading FRED data ===\n") |
|
|
| |
| single = build_ohio_unemployment() |
| out = DATA_DIR / "demo_ohio_unemployment.csv" |
| single.to_csv(out, index=False) |
| print(f" -> {len(single)} rows => {out}\n") |
|
|
| |
| wide = build_manufacturing_wide() |
| out = DATA_DIR / "demo_manufacturing_wide.csv" |
| wide.to_csv(out, index=False) |
| print(f" -> {len(wide)} rows => {out}\n") |
|
|
| |
| long = build_manufacturing_long(wide) |
| out = DATA_DIR / "demo_manufacturing_long.csv" |
| long.to_csv(out, index=False) |
| print(f" -> {len(long)} rows => {out}\n") |
|
|
| print("Done.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|