| """ |
| QueryChat initialization and filtered DataFrame helpers. |
| |
| Provides convenience wrappers around the ``querychat`` library for |
| natural-language filtering of time-series DataFrames inside a Gradio |
| app. All functions degrade gracefully when the package or an API key |
| is unavailable. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| from typing import List, Optional |
|
|
| import pandas as pd |
|
|
| try: |
| from querychat.gradio import QueryChat as _QueryChat |
|
|
| _QUERYCHAT_AVAILABLE = True |
| except ImportError: |
| _QUERYCHAT_AVAILABLE = False |
|
|
|
|
| |
| |
| |
|
|
| def check_querychat_available() -> bool: |
| """Return ``True`` when both *querychat* is installed and an API key is set. |
| |
| QueryChat requires an ``OPENAI_API_KEY`` environment variable. This |
| helper lets callers gate UI elements behind a simple boolean. |
| """ |
| if not _QUERYCHAT_AVAILABLE: |
| return False |
| return bool(os.environ.get("OPENAI_API_KEY")) |
|
|
|
|
| |
| |
| |
|
|
| def create_querychat( |
| df: pd.DataFrame, |
| name: str = "dataset", |
| date_col: str = "date", |
| y_cols: Optional[List[str]] = None, |
| freq_label: str = "", |
| ): |
| """Create and return a QueryChat instance bound to *df*. |
| |
| Parameters |
| ---------- |
| df: |
| The pandas DataFrame to expose to the chat interface. |
| name: |
| A human-readable name for the dataset (used in the description). |
| date_col: |
| Name of the date/time column. |
| y_cols: |
| Names of the value (numeric) columns. If ``None``, an empty |
| list is used in the description. |
| freq_label: |
| Optional frequency label (e.g. ``"Monthly"``, ``"Daily"``). |
| |
| Returns |
| ------- |
| QueryChat instance |
| The object returned by ``QueryChat()``. |
| |
| Raises |
| ------ |
| RuntimeError |
| If querychat is not installed. |
| """ |
| if not _QUERYCHAT_AVAILABLE: |
| raise RuntimeError( |
| "The 'querychat' package is not installed. " |
| "Install it with: pip install 'querychat[gradio]'" |
| ) |
|
|
| if y_cols is None: |
| y_cols = [] |
|
|
| value_cols_str = ", ".join(y_cols) if y_cols else "none specified" |
| freq_part = f" Frequency: {freq_label}." if freq_label else "" |
|
|
| data_description = ( |
| f"This dataset is named '{name}'. " |
| f"It contains {len(df):,} rows. " |
| f"The date column is '{date_col}'. " |
| f"Value columns: {value_cols_str}." |
| f"{freq_part}" |
| ) |
|
|
| |
| if y_cols: |
| first_y = y_cols[0] |
| filter_example = f'- "Filter where {first_y} > median"' |
| else: |
| filter_example = '- "Filter where value > 100"' |
|
|
| greeting = ( |
| f"Hi! I can help you filter and explore the **{name}** dataset. " |
| "Try asking me something like:\n" |
| '- "Show only the last 5 years"\n' |
| f"{filter_example}\n" |
| '- "Show rows from January to March"' |
| ) |
|
|
| qc = _QueryChat( |
| data_source=df, |
| table_name=name.replace(" ", "_"), |
| client="openai/gpt-5.2-2025-12-11", |
| data_description=data_description, |
| greeting=greeting, |
| ) |
|
|
| return qc |
|
|
|
|
| |
| |
| |
|
|
| def get_filtered_pandas_df(qc, state_dict=None) -> pd.DataFrame: |
| """Extract the currently filtered DataFrame from a QueryChat instance. |
| |
| The underlying ``qc.df()`` may return a *narwhals* DataFrame rather |
| than a pandas one. This helper transparently converts when needed |
| and falls back to the original frame on any error. |
| |
| Parameters |
| ---------- |
| qc: |
| A QueryChat instance previously created via :func:`create_querychat`. |
| state_dict: |
| The Gradio state dictionary from ``qc.ui()``. Required for the |
| Gradio variant of QueryChat. |
| |
| Returns |
| ------- |
| pd.DataFrame |
| The filtered data as a pandas DataFrame. |
| """ |
| try: |
| if state_dict is not None: |
| result = qc.df(state_dict) |
| else: |
| result = qc.df() |
|
|
| |
| if hasattr(result, "to_pandas"): |
| return result.to_pandas() |
|
|
| |
| if hasattr(result, "to_native"): |
| native = result.to_native() |
| if isinstance(native, pd.DataFrame): |
| return native |
| return pd.DataFrame(native) |
|
|
| |
| if isinstance(result, pd.DataFrame): |
| return result |
|
|
| |
| return pd.DataFrame(result) |
| except Exception: |
| |
| |
| try: |
| raw = qc.df() if state_dict is None else qc.df(state_dict) |
| if isinstance(raw, pd.DataFrame): |
| return raw |
| except Exception: |
| pass |
|
|
| return pd.DataFrame() |
|
|