Deploy: data flow monitoring, email alerts, chatbot validation
Browse files- backend/api/main.py +34 -0
- backend/services/__init__.py +0 -0
- backend/services/data_flow_monitor.py +164 -0
- backend/services/email_alerter.py +96 -0
- config/settings.py +15 -0
- src/chatbot/guardrails.py +87 -0
- src/chatbot/vineyard_chatbot.py +36 -5
backend/api/main.py
CHANGED
|
@@ -149,6 +149,38 @@ async def _sensor_refresh_loop(interval_sec: int = 120):
|
|
| 149 |
await asyncio.sleep(interval_sec)
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
@asynccontextmanager
|
| 153 |
async def lifespan(app: FastAPI):
|
| 154 |
global _start_time
|
|
@@ -166,10 +198,12 @@ async def lifespan(app: FastAPI):
|
|
| 166 |
import asyncio
|
| 167 |
ims_task = asyncio.create_task(_ims_refresh_loop())
|
| 168 |
sensor_task = asyncio.create_task(_sensor_refresh_loop())
|
|
|
|
| 169 |
|
| 170 |
yield
|
| 171 |
ims_task.cancel()
|
| 172 |
sensor_task.cancel()
|
|
|
|
| 173 |
log.info("SolarWine API shutting down (uptime=%.0fs)", get_uptime())
|
| 174 |
|
| 175 |
|
|
|
|
| 149 |
await asyncio.sleep(interval_sec)
|
| 150 |
|
| 151 |
|
| 152 |
+
async def _data_flow_alert_loop(interval_sec: int = 300):
|
| 153 |
+
"""Background loop: check data flow health, send email alerts if red."""
|
| 154 |
+
import asyncio
|
| 155 |
+
from backend.services.data_flow_monitor import DataFlowMonitor
|
| 156 |
+
from backend.services.email_alerter import EmailAlerter
|
| 157 |
+
from backend.api.events import event_bus
|
| 158 |
+
|
| 159 |
+
await asyncio.sleep(60) # let startup finish
|
| 160 |
+
monitor = DataFlowMonitor()
|
| 161 |
+
alerter = EmailAlerter()
|
| 162 |
+
if alerter.enabled:
|
| 163 |
+
log.info("Email alerter active → %s", os.environ.get("ALERT_EMAIL_TO", ""))
|
| 164 |
+
else:
|
| 165 |
+
log.info("Email alerter disabled (set SMTP_HOST + ALERT_EMAIL_TO to enable)")
|
| 166 |
+
|
| 167 |
+
while True:
|
| 168 |
+
try:
|
| 169 |
+
from backend.api.deps import get_datahub
|
| 170 |
+
hub = get_datahub()
|
| 171 |
+
loop = asyncio.get_event_loop()
|
| 172 |
+
status = await loop.run_in_executor(None, monitor.check_all, hub)
|
| 173 |
+
# Notify SSE clients so the frontend status indicator updates
|
| 174 |
+
await event_bus.notify("health")
|
| 175 |
+
# Send email alerts for red sources
|
| 176 |
+
alerted = alerter.check_and_alert(status)
|
| 177 |
+
if alerted:
|
| 178 |
+
log.warning("Data flow alerts sent for: %s", ", ".join(alerted))
|
| 179 |
+
except Exception as exc:
|
| 180 |
+
log.error("Data flow alert check failed: %s", exc)
|
| 181 |
+
await asyncio.sleep(interval_sec)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
@asynccontextmanager
|
| 185 |
async def lifespan(app: FastAPI):
|
| 186 |
global _start_time
|
|
|
|
| 198 |
import asyncio
|
| 199 |
ims_task = asyncio.create_task(_ims_refresh_loop())
|
| 200 |
sensor_task = asyncio.create_task(_sensor_refresh_loop())
|
| 201 |
+
alert_task = asyncio.create_task(_data_flow_alert_loop())
|
| 202 |
|
| 203 |
yield
|
| 204 |
ims_task.cancel()
|
| 205 |
sensor_task.cancel()
|
| 206 |
+
alert_task.cancel()
|
| 207 |
log.info("SolarWine API shutting down (uptime=%.0fs)", get_uptime())
|
| 208 |
|
| 209 |
|
backend/services/__init__.py
ADDED
|
File without changes
|
backend/services/data_flow_monitor.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data flow monitor — checks IMS and ThingsBoard data freshness.
|
| 3 |
+
|
| 4 |
+
Returns per-source status (green / yellow / red) with age and messages.
|
| 5 |
+
Used by the /api/health/data-sources endpoint and the email alerter.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
from datetime import datetime, timezone
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
from config.settings import (
|
| 15 |
+
IMS_STALE_YELLOW_MIN,
|
| 16 |
+
IMS_STALE_RED_MIN,
|
| 17 |
+
TB_STALE_YELLOW_MIN,
|
| 18 |
+
TB_STALE_RED_MIN,
|
| 19 |
+
ENERGY_STALE_YELLOW_MIN,
|
| 20 |
+
ENERGY_STALE_RED_MIN,
|
| 21 |
+
)
|
| 22 |
+
from src.data.data_providers import DataHub
|
| 23 |
+
|
| 24 |
+
log = logging.getLogger("solarwine.monitor")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _classify(age_minutes: float | None, yellow: float, red: float) -> str:
|
| 28 |
+
"""Return green / yellow / red based on age thresholds."""
|
| 29 |
+
if age_minutes is None:
|
| 30 |
+
return "red"
|
| 31 |
+
if age_minutes < yellow:
|
| 32 |
+
return "green"
|
| 33 |
+
if age_minutes < red:
|
| 34 |
+
return "yellow"
|
| 35 |
+
return "red"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _status_message(source: str, status: str, age: float | None) -> str:
|
| 39 |
+
if status == "green":
|
| 40 |
+
return f"{source} data is fresh"
|
| 41 |
+
if age is not None:
|
| 42 |
+
return f"{source} data is {age:.0f} min old"
|
| 43 |
+
return f"{source} data is unavailable"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class DataFlowMonitor:
|
| 47 |
+
"""Computes per-source data flow status."""
|
| 48 |
+
|
| 49 |
+
def check_all(self, hub: DataHub) -> dict[str, Any]:
|
| 50 |
+
now_iso = datetime.now(timezone.utc).isoformat()
|
| 51 |
+
sources: dict[str, dict] = {}
|
| 52 |
+
|
| 53 |
+
# --- IMS Weather ---
|
| 54 |
+
sources["ims_weather"] = self._check_ims(hub)
|
| 55 |
+
|
| 56 |
+
# --- ThingsBoard Sensors ---
|
| 57 |
+
sources["tb_sensors"] = self._check_tb_sensors(hub)
|
| 58 |
+
|
| 59 |
+
# --- ThingsBoard Energy ---
|
| 60 |
+
sources["tb_energy"] = self._check_tb_energy(hub)
|
| 61 |
+
|
| 62 |
+
# Overall status: worst of all sources
|
| 63 |
+
statuses = [s["status"] for s in sources.values()]
|
| 64 |
+
if "red" in statuses:
|
| 65 |
+
overall = "red"
|
| 66 |
+
elif "yellow" in statuses:
|
| 67 |
+
overall = "yellow"
|
| 68 |
+
else:
|
| 69 |
+
overall = "green"
|
| 70 |
+
|
| 71 |
+
return {
|
| 72 |
+
"overall": overall,
|
| 73 |
+
"checked_at": now_iso,
|
| 74 |
+
"sources": sources,
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
def _check_ims(self, hub: DataHub) -> dict:
|
| 78 |
+
try:
|
| 79 |
+
wx = hub.weather.get_current()
|
| 80 |
+
if wx and "error" not in wx:
|
| 81 |
+
age = float(wx.get("age_minutes", -1))
|
| 82 |
+
if age < 0:
|
| 83 |
+
age = None
|
| 84 |
+
status = _classify(age, IMS_STALE_YELLOW_MIN, IMS_STALE_RED_MIN)
|
| 85 |
+
return {
|
| 86 |
+
"status": status,
|
| 87 |
+
"age_minutes": round(age, 1) if age is not None else None,
|
| 88 |
+
"last_reading": wx.get("timestamp_local") or wx.get("timestamp_utc"),
|
| 89 |
+
"message": _status_message("IMS weather", status, age),
|
| 90 |
+
}
|
| 91 |
+
return {
|
| 92 |
+
"status": "red",
|
| 93 |
+
"age_minutes": None,
|
| 94 |
+
"last_reading": None,
|
| 95 |
+
"message": f"IMS weather error: {wx.get('error', 'unavailable')}",
|
| 96 |
+
}
|
| 97 |
+
except Exception as exc:
|
| 98 |
+
log.warning("IMS health check failed: %s", exc)
|
| 99 |
+
return {
|
| 100 |
+
"status": "red",
|
| 101 |
+
"age_minutes": None,
|
| 102 |
+
"last_reading": None,
|
| 103 |
+
"message": f"IMS weather unreachable: {exc}",
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
def _check_tb_sensors(self, hub: DataHub) -> dict:
|
| 107 |
+
try:
|
| 108 |
+
snap = hub.vine_sensors.get_snapshot(light=True)
|
| 109 |
+
if snap and "error" not in snap:
|
| 110 |
+
stale = snap.get("staleness_minutes")
|
| 111 |
+
age = float(stale) if stale is not None else None
|
| 112 |
+
status = _classify(age, TB_STALE_YELLOW_MIN, TB_STALE_RED_MIN)
|
| 113 |
+
return {
|
| 114 |
+
"status": status,
|
| 115 |
+
"age_minutes": round(age, 1) if age is not None else None,
|
| 116 |
+
"last_reading": snap.get("timestamp"),
|
| 117 |
+
"message": _status_message("ThingsBoard sensors", status, age),
|
| 118 |
+
}
|
| 119 |
+
return {
|
| 120 |
+
"status": "red",
|
| 121 |
+
"age_minutes": None,
|
| 122 |
+
"last_reading": None,
|
| 123 |
+
"message": f"TB sensors error: {snap.get('error', 'unavailable')}",
|
| 124 |
+
}
|
| 125 |
+
except Exception as exc:
|
| 126 |
+
log.warning("TB sensors health check failed: %s", exc)
|
| 127 |
+
return {
|
| 128 |
+
"status": "red",
|
| 129 |
+
"age_minutes": None,
|
| 130 |
+
"last_reading": None,
|
| 131 |
+
"message": f"TB sensors unreachable: {exc}",
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
def _check_tb_energy(self, hub: DataHub) -> dict:
|
| 135 |
+
try:
|
| 136 |
+
en = hub.energy.get_current()
|
| 137 |
+
if en and "error" not in en:
|
| 138 |
+
power = en.get("power_kw")
|
| 139 |
+
# Energy doesn't always expose age_minutes — infer from cache TTL
|
| 140 |
+
age = float(en["age_minutes"]) if "age_minutes" in en else None
|
| 141 |
+
status = _classify(age, ENERGY_STALE_YELLOW_MIN, ENERGY_STALE_RED_MIN)
|
| 142 |
+
if age is None:
|
| 143 |
+
# If no age but we have data, assume green (just fetched)
|
| 144 |
+
status = "green"
|
| 145 |
+
return {
|
| 146 |
+
"status": status,
|
| 147 |
+
"age_minutes": round(age, 1) if age is not None else None,
|
| 148 |
+
"power_kw": round(float(power), 2) if power is not None else None,
|
| 149 |
+
"message": _status_message("Energy telemetry", status, age),
|
| 150 |
+
}
|
| 151 |
+
return {
|
| 152 |
+
"status": "red",
|
| 153 |
+
"age_minutes": None,
|
| 154 |
+
"power_kw": None,
|
| 155 |
+
"message": f"Energy error: {en.get('error', 'unavailable')}",
|
| 156 |
+
}
|
| 157 |
+
except Exception as exc:
|
| 158 |
+
log.warning("Energy health check failed: %s", exc)
|
| 159 |
+
return {
|
| 160 |
+
"status": "red",
|
| 161 |
+
"age_minutes": None,
|
| 162 |
+
"power_kw": None,
|
| 163 |
+
"message": f"Energy unreachable: {exc}",
|
| 164 |
+
}
|
backend/services/email_alerter.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Email alerter — sends notifications when data sources go stale.
|
| 3 |
+
|
| 4 |
+
Activated by setting env vars: SMTP_HOST, SMTP_PORT, SMTP_USER, SMTP_PASSWORD, ALERT_EMAIL_TO.
|
| 5 |
+
Respects a per-source cooldown to avoid spamming.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import os
|
| 12 |
+
import smtplib
|
| 13 |
+
import time
|
| 14 |
+
from email.mime.text import MIMEText
|
| 15 |
+
from typing import Any
|
| 16 |
+
|
| 17 |
+
from config.settings import ALERT_COOLDOWN_MIN
|
| 18 |
+
|
| 19 |
+
log = logging.getLogger("solarwine.alerter")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class EmailAlerter:
|
| 23 |
+
"""Sends email alerts when data sources are in 'red' status."""
|
| 24 |
+
|
| 25 |
+
def __init__(self):
|
| 26 |
+
self._last_alert: dict[str, float] = {} # source_name -> epoch of last alert
|
| 27 |
+
self._smtp_host = os.environ.get("SMTP_HOST", "")
|
| 28 |
+
self._smtp_port = int(os.environ.get("SMTP_PORT", "587"))
|
| 29 |
+
self._smtp_user = os.environ.get("SMTP_USER", "")
|
| 30 |
+
self._smtp_password = os.environ.get("SMTP_PASSWORD", "")
|
| 31 |
+
self._alert_to = os.environ.get("ALERT_EMAIL_TO", "")
|
| 32 |
+
self._alert_from = os.environ.get("ALERT_EMAIL_FROM", self._smtp_user)
|
| 33 |
+
|
| 34 |
+
@property
|
| 35 |
+
def enabled(self) -> bool:
|
| 36 |
+
return bool(self._smtp_host and self._alert_to)
|
| 37 |
+
|
| 38 |
+
def check_and_alert(self, status: dict[str, Any]) -> list[str]:
|
| 39 |
+
"""Check status and send alerts for red sources. Returns list of alerted sources."""
|
| 40 |
+
if not self.enabled:
|
| 41 |
+
return []
|
| 42 |
+
|
| 43 |
+
alerted: list[str] = []
|
| 44 |
+
sources = status.get("sources", {})
|
| 45 |
+
|
| 46 |
+
for source_name, info in sources.items():
|
| 47 |
+
if info.get("status") != "red":
|
| 48 |
+
# Source recovered — clear cooldown so next outage triggers immediately
|
| 49 |
+
self._last_alert.pop(source_name, None)
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
# Check cooldown
|
| 53 |
+
now = time.time()
|
| 54 |
+
last = self._last_alert.get(source_name, 0)
|
| 55 |
+
if (now - last) < ALERT_COOLDOWN_MIN * 60:
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
# Send alert
|
| 59 |
+
message = info.get("message", f"{source_name} is down")
|
| 60 |
+
age = info.get("age_minutes")
|
| 61 |
+
subject = f"[SolarWine] Data flow alert: {source_name}"
|
| 62 |
+
body = (
|
| 63 |
+
f"Data source: {source_name}\n"
|
| 64 |
+
f"Status: RED\n"
|
| 65 |
+
f"Age: {age:.0f} min\n" if age is not None else ""
|
| 66 |
+
f"Detail: {message}\n"
|
| 67 |
+
f"\nChecked at: {status.get('checked_at', 'unknown')}\n"
|
| 68 |
+
f"Overall system status: {status.get('overall', 'unknown')}\n"
|
| 69 |
+
f"\n---\nSolarWine Data Flow Monitor"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
if self._send_email(subject, body):
|
| 73 |
+
self._last_alert[source_name] = now
|
| 74 |
+
alerted.append(source_name)
|
| 75 |
+
|
| 76 |
+
return alerted
|
| 77 |
+
|
| 78 |
+
def _send_email(self, subject: str, body: str) -> bool:
|
| 79 |
+
"""Send an email via SMTP. Returns True on success."""
|
| 80 |
+
try:
|
| 81 |
+
msg = MIMEText(body, "plain", "utf-8")
|
| 82 |
+
msg["Subject"] = subject
|
| 83 |
+
msg["From"] = self._alert_from
|
| 84 |
+
msg["To"] = self._alert_to
|
| 85 |
+
|
| 86 |
+
with smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=10) as server:
|
| 87 |
+
server.starttls()
|
| 88 |
+
if self._smtp_user and self._smtp_password:
|
| 89 |
+
server.login(self._smtp_user, self._smtp_password)
|
| 90 |
+
server.sendmail(self._alert_from, self._alert_to.split(","), msg.as_string())
|
| 91 |
+
|
| 92 |
+
log.info("Alert email sent: %s → %s", subject, self._alert_to)
|
| 93 |
+
return True
|
| 94 |
+
except Exception as exc:
|
| 95 |
+
log.error("Failed to send alert email: %s", exc)
|
| 96 |
+
return False
|
config/settings.py
CHANGED
|
@@ -202,3 +202,18 @@ DP_BASE_CROP_VALUE = 0.10
|
|
| 202 |
SIMULATION_LOG_DIR = DATA_DIR / "simulation_logs"
|
| 203 |
SIMULATION_LOG_PATH = SIMULATION_LOG_DIR / "control_loop.parquet"
|
| 204 |
DAILY_PLAN_PATH = DATA_DIR / "daily_plan.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
SIMULATION_LOG_DIR = DATA_DIR / "simulation_logs"
|
| 203 |
SIMULATION_LOG_PATH = SIMULATION_LOG_DIR / "control_loop.parquet"
|
| 204 |
DAILY_PLAN_PATH = DATA_DIR / "daily_plan.json"
|
| 205 |
+
|
| 206 |
+
# ---------------------------------------------------------------------------
|
| 207 |
+
# Data Flow Monitoring
|
| 208 |
+
# ---------------------------------------------------------------------------
|
| 209 |
+
|
| 210 |
+
# Staleness thresholds (minutes) — green → yellow → red
|
| 211 |
+
IMS_STALE_YELLOW_MIN = 60 # IMS weather data older than this = yellow
|
| 212 |
+
IMS_STALE_RED_MIN = 180 # IMS weather data older than this = red
|
| 213 |
+
TB_STALE_YELLOW_MIN = 15 # ThingsBoard sensor data older than this = yellow
|
| 214 |
+
TB_STALE_RED_MIN = 60 # ThingsBoard sensor data older than this = red
|
| 215 |
+
ENERGY_STALE_YELLOW_MIN = 15 # Energy telemetry older than this = yellow
|
| 216 |
+
ENERGY_STALE_RED_MIN = 60 # Energy telemetry older than this = red
|
| 217 |
+
|
| 218 |
+
# Email alerts (activated when SMTP_HOST + ALERT_EMAIL_TO env vars are set)
|
| 219 |
+
ALERT_COOLDOWN_MIN = 60 # minimum minutes between repeat alerts for same source
|
src/chatbot/guardrails.py
CHANGED
|
@@ -360,4 +360,91 @@ def tag_tool_result(tool_name: str, tool_result: dict) -> dict:
|
|
| 360 |
"Warn the user that conditions may have changed."
|
| 361 |
)
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
return tagged
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
"Warn the user that conditions may have changed."
|
| 361 |
)
|
| 362 |
|
| 363 |
+
# Validate numeric ranges — flag physically impossible values
|
| 364 |
+
range_warnings = validate_numeric_ranges(tool_name, tool_result)
|
| 365 |
+
if range_warnings:
|
| 366 |
+
tagged["_range_warnings"] = range_warnings
|
| 367 |
+
|
| 368 |
return tagged
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
# ---------------------------------------------------------------------------
|
| 372 |
+
# 5. Numeric range validation — catch sensor faults & model errors
|
| 373 |
+
# ---------------------------------------------------------------------------
|
| 374 |
+
|
| 375 |
+
# Physical bounds for common fields (field_name → (min, max, unit))
|
| 376 |
+
_PHYSICAL_BOUNDS: dict[str, tuple[float, float, str]] = {
|
| 377 |
+
"air_temperature_c": (-10.0, 55.0, "°C"),
|
| 378 |
+
"ghi_w_m2": (0.0, 1400.0, "W/m²"),
|
| 379 |
+
"rh_percent": (0.0, 100.0, "%"),
|
| 380 |
+
"wind_speed_ms": (0.0, 50.0, "m/s"),
|
| 381 |
+
"A_net": (-5.0, 40.0, "µmol CO₂/m²/s"),
|
| 382 |
+
"power_kw": (0.0, 60.0, "kW"),
|
| 383 |
+
"daily_kwh": (0.0, 500.0, "kWh"),
|
| 384 |
+
"PAR": (0.0, 2500.0, "µmol/m²/s"),
|
| 385 |
+
"Tleaf": (-5.0, 60.0, "°C"),
|
| 386 |
+
"VPD": (0.0, 10.0, "kPa"),
|
| 387 |
+
"CO2": (200.0, 800.0, "ppm"),
|
| 388 |
+
"CWSI": (0.0, 1.0, ""),
|
| 389 |
+
"staleness_minutes": (0.0, 1440.0, "min"),
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def validate_numeric_ranges(tool_name: str, result: dict) -> list[str]:
|
| 394 |
+
"""Check tool result values against physical bounds.
|
| 395 |
+
|
| 396 |
+
Returns a list of warning strings for out-of-range values.
|
| 397 |
+
"""
|
| 398 |
+
warnings: list[str] = []
|
| 399 |
+
|
| 400 |
+
for key, (lo, hi, unit) in _PHYSICAL_BOUNDS.items():
|
| 401 |
+
val = result.get(key)
|
| 402 |
+
if val is None:
|
| 403 |
+
continue
|
| 404 |
+
try:
|
| 405 |
+
v = float(val)
|
| 406 |
+
except (TypeError, ValueError):
|
| 407 |
+
continue
|
| 408 |
+
if v < lo or v > hi:
|
| 409 |
+
warnings.append(
|
| 410 |
+
f"{key}={v:.1f}{unit} is outside physical range "
|
| 411 |
+
f"[{lo:.0f}–{hi:.0f}] — possible sensor fault"
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
return warnings
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
# ---------------------------------------------------------------------------
|
| 418 |
+
# 6. Cross-source consistency check
|
| 419 |
+
# ---------------------------------------------------------------------------
|
| 420 |
+
|
| 421 |
+
def check_cross_source_consistency(
|
| 422 |
+
weather: Optional[dict],
|
| 423 |
+
sensors: Optional[dict],
|
| 424 |
+
) -> list[str]:
|
| 425 |
+
"""Compare IMS weather and TB sensor readings for consistency.
|
| 426 |
+
|
| 427 |
+
Returns a list of caveat strings when sources diverge significantly.
|
| 428 |
+
"""
|
| 429 |
+
caveats: list[str] = []
|
| 430 |
+
if not weather or not sensors:
|
| 431 |
+
return caveats
|
| 432 |
+
if "error" in weather or "error" in sensors:
|
| 433 |
+
return caveats
|
| 434 |
+
|
| 435 |
+
# Temperature: IMS air temp vs TB treatment air temp
|
| 436 |
+
ims_temp = weather.get("air_temperature_c")
|
| 437 |
+
tb_temp = sensors.get("treatment_air_temp_c")
|
| 438 |
+
if ims_temp is not None and tb_temp is not None:
|
| 439 |
+
try:
|
| 440 |
+
diff = abs(float(ims_temp) - float(tb_temp))
|
| 441 |
+
if diff > 5.0:
|
| 442 |
+
caveats.append(
|
| 443 |
+
f"IMS air temperature ({float(ims_temp):.1f}°C) and on-site sensor "
|
| 444 |
+
f"({float(tb_temp):.1f}°C) differ by {diff:.1f}°C — one source may "
|
| 445 |
+
f"be stale or malfunctioning."
|
| 446 |
+
)
|
| 447 |
+
except (TypeError, ValueError):
|
| 448 |
+
pass
|
| 449 |
+
|
| 450 |
+
return caveats
|
src/chatbot/vineyard_chatbot.py
CHANGED
|
@@ -26,6 +26,7 @@ from typing import Optional
|
|
| 26 |
from src.data_providers import DataHub
|
| 27 |
from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
|
| 28 |
from src.chatbot.guardrails import (
|
|
|
|
| 29 |
classify_query,
|
| 30 |
estimate_confidence,
|
| 31 |
get_source_label,
|
|
@@ -323,17 +324,30 @@ _RULE_KEYWORDS = {
|
|
| 323 |
_PINNED_RULES = {"no_shade_before_10", "energy_budget", "temperature_transition"}
|
| 324 |
|
| 325 |
|
| 326 |
-
def retrieve_relevant_rules(query: str, max_rules: int =
|
| 327 |
"""Retrieve the most relevant biology rules for a query.
|
| 328 |
|
| 329 |
Returns up to ``max_rules`` rule names, always including pinned rules.
|
| 330 |
-
Uses keyword matching
|
|
|
|
|
|
|
| 331 |
"""
|
| 332 |
query_lower = query.lower()
|
| 333 |
-
|
|
|
|
| 334 |
|
| 335 |
for rule_name, keywords in _RULE_KEYWORDS.items():
|
| 336 |
-
score =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
if score > 0:
|
| 338 |
scores[rule_name] = score
|
| 339 |
|
|
@@ -345,7 +359,6 @@ def retrieve_relevant_rules(query: str, max_rules: int = 5) -> list[str]:
|
|
| 345 |
break
|
| 346 |
selected.add(name)
|
| 347 |
|
| 348 |
-
# If we still have room, add remaining pinned rules
|
| 349 |
return [r for r in BIOLOGY_RULES if r in selected]
|
| 350 |
|
| 351 |
|
|
@@ -943,6 +956,24 @@ class VineyardChatbot:
|
|
| 943 |
f"Data is {data_age:.0f} minutes old — conditions may have changed."
|
| 944 |
)
|
| 945 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
# Build sources list
|
| 947 |
sources: list[str] = []
|
| 948 |
if tool_name:
|
|
|
|
| 26 |
from src.data_providers import DataHub
|
| 27 |
from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key
|
| 28 |
from src.chatbot.guardrails import (
|
| 29 |
+
check_cross_source_consistency,
|
| 30 |
classify_query,
|
| 31 |
estimate_confidence,
|
| 32 |
get_source_label,
|
|
|
|
| 324 |
_PINNED_RULES = {"no_shade_before_10", "energy_budget", "temperature_transition"}
|
| 325 |
|
| 326 |
|
| 327 |
+
def retrieve_relevant_rules(query: str, max_rules: int = 6) -> list[str]:
|
| 328 |
"""Retrieve the most relevant biology rules for a query.
|
| 329 |
|
| 330 |
Returns up to ``max_rules`` rule names, always including pinned rules.
|
| 331 |
+
Uses weighted keyword matching with partial-match support:
|
| 332 |
+
- Exact keyword match: +2 points
|
| 333 |
+
- Partial word overlap: +1 point (e.g. "irrigat" matches "irrigation")
|
| 334 |
"""
|
| 335 |
query_lower = query.lower()
|
| 336 |
+
query_words = set(re.findall(r'\w+', query_lower))
|
| 337 |
+
scores: dict[str, float] = {}
|
| 338 |
|
| 339 |
for rule_name, keywords in _RULE_KEYWORDS.items():
|
| 340 |
+
score = 0.0
|
| 341 |
+
for kw in keywords:
|
| 342 |
+
if kw in query_lower:
|
| 343 |
+
# Exact substring match — strong signal
|
| 344 |
+
score += 2.0
|
| 345 |
+
else:
|
| 346 |
+
# Partial word overlap — weaker signal
|
| 347 |
+
kw_words = set(re.findall(r'\w+', kw))
|
| 348 |
+
overlap = kw_words & query_words
|
| 349 |
+
if overlap:
|
| 350 |
+
score += len(overlap) * 0.5
|
| 351 |
if score > 0:
|
| 352 |
scores[rule_name] = score
|
| 353 |
|
|
|
|
| 359 |
break
|
| 360 |
selected.add(name)
|
| 361 |
|
|
|
|
| 362 |
return [r for r in BIOLOGY_RULES if r in selected]
|
| 363 |
|
| 364 |
|
|
|
|
| 956 |
f"Data is {data_age:.0f} minutes old — conditions may have changed."
|
| 957 |
)
|
| 958 |
|
| 959 |
+
# Range validation warnings
|
| 960 |
+
if tool_result:
|
| 961 |
+
range_warnings = tool_result.get("_range_warnings") or (
|
| 962 |
+
tagged_result.get("_range_warnings") if tool_call else None
|
| 963 |
+
)
|
| 964 |
+
if range_warnings:
|
| 965 |
+
for rw in range_warnings:
|
| 966 |
+
caveats.append(rw)
|
| 967 |
+
|
| 968 |
+
# Cross-source consistency check (when we have both weather + sensors)
|
| 969 |
+
try:
|
| 970 |
+
wx_data = self.hub.weather.get_current()
|
| 971 |
+
sensor_data = self.hub.vine_sensors.get_snapshot(light=True)
|
| 972 |
+
consistency_caveats = check_cross_source_consistency(wx_data, sensor_data)
|
| 973 |
+
caveats.extend(consistency_caveats)
|
| 974 |
+
except Exception:
|
| 975 |
+
pass
|
| 976 |
+
|
| 977 |
# Build sources list
|
| 978 |
sources: list[str] = []
|
| 979 |
if tool_name:
|