api / src /models /phenology.py
Eli Safra
Deploy SolarWine API (FastAPI + Docker, port 7860)
938949f
"""
Phenology tracker for Semillon grapevines in the Negev (Sde Boker).
Three estimation methods (highest confidence wins):
1. **GDD-based** — accumulates Growing Degree Days from IMS temperature data
using base temperature 10°C and thresholds in config/settings.py.
2. **Camera-based** — sends live vineyard image to Gemini Vision for
visual phenological stage detection (optional, requires API key).
3. **Calendar-based** — fallback using month-to-stage mapping.
The public API is unchanged: ``estimate_stage_for_date()`` remains the
primary entry point. New functions ``estimate_stage_by_gdd()`` and
``detect_stage_from_camera()`` are available for higher-confidence estimates.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from datetime import date, datetime
from pathlib import Path
from typing import List, Optional, Tuple
import numpy as np
import pandas as pd
from config.settings import PHENOLOGY_GDD_THRESHOLDS
logger = logging.getLogger(__name__)
# Base temperature for GDD accumulation (Semillon at Sde Boker)
GDD_BASE_TEMP_C = 10.0
# Camera URL for live vineyard feed
CAMERA_URL = "https://app.solarwine.ai/images/yerucham/last_view.jpg"
# ---------------------------------------------------------------------------
# Data containers
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class PhenologyStage:
"""Simple container for phenological stage metadata."""
id: str
label: str
description: str
@dataclass
class GDDState:
"""Cumulative GDD tracking state."""
cumulative_gdd: float
stage_id: str
stage_label: str
days_computed: int
season_start: str # ISO date
last_date: str # ISO date
daily_gdd: List[float] = field(default_factory=list)
@dataclass
class CameraDetection:
"""Result of camera-based phenological stage detection."""
detected_stage_id: str
confidence: str # "high", "medium", "low"
observations: str # what Gemini saw
matches_gdd: Optional[bool] = None # whether it agrees with GDD estimate
# ---------------------------------------------------------------------------
# Stage definitions (shared across all estimation methods)
# ---------------------------------------------------------------------------
_STAGE_DEFINITIONS = {
"winter_dormancy": PhenologyStage(
id="winter_dormancy",
label="Winter dormancy",
description=(
"No active canopy or fruit. Vines are resting and rebuilding "
"reserves in trunk and roots. Panels track the sun at full "
"astronomical tracking — maximum energy generation."
),
),
"budburst_vegetative": PhenologyStage(
id="budburst_vegetative",
label="Budburst / early vegetative",
description=(
"New shoots and leaves are expanding. Vine is light-limited "
"(RuBP regime) — every photon drives canopy growth. Panels at "
"full tracking; energy and vine interests are fully aligned."
),
),
"flowering_fruit_set": PhenologyStage(
id="flowering_fruit_set",
label="Flowering / fruit set",
description=(
"Yield formation is highly sensitive — cluster number and berry "
"set are determined now. Light demand is at its peak. Panels at "
"full tracking; energy generation and vine needs are aligned."
),
),
"berry_growth": PhenologyStage(
id="berry_growth",
label="Berry growth",
description=(
"Canopy is substantial and berries are expanding rapidly. "
"Water management is critical. Heat stress may begin to "
"limit photosynthesis on the hottest afternoons (Rubisco regime)."
),
),
"veraison_ripening": PhenologyStage(
id="veraison_ripening",
label="Veraison / ripening",
description=(
"Berry ripening and flavour development dominate. Sugar loading "
"depends on upper-canopy photosynthesis. Fruiting zone is most "
"vulnerable to sunburn on hot afternoons (>35C)."
),
),
"post_harvest_reserves": PhenologyStage(
id="post_harvest_reserves",
label="Post-harvest reserve building",
description=(
"Berries mostly harvested. Canopy refills carbohydrate reserves "
"for next season. Healthy leaves are essential for reserve "
"accumulation. Energy generation is the primary output."
),
),
}
# ---------------------------------------------------------------------------
# 1. Calendar-based estimation (original fallback)
# ---------------------------------------------------------------------------
def _estimate_stage_by_month(month: int) -> PhenologyStage:
"""Approximate Semillon stage in the Negev using calendar month."""
if month in (1, 2, 11, 12):
return _STAGE_DEFINITIONS["winter_dormancy"]
if month in (3, 4):
return _STAGE_DEFINITIONS["budburst_vegetative"]
if month == 5:
return _STAGE_DEFINITIONS["flowering_fruit_set"]
if month in (6, 7):
return _STAGE_DEFINITIONS["berry_growth"]
if month == 8:
return _STAGE_DEFINITIONS["veraison_ripening"]
if month in (9, 10):
return _STAGE_DEFINITIONS["post_harvest_reserves"]
return PhenologyStage(id="unknown", label="Unknown",
description="Phenological stage could not be determined.")
# ---------------------------------------------------------------------------
# 2. GDD-based estimation
# ---------------------------------------------------------------------------
def compute_gdd_from_ims(
ims_df: pd.DataFrame,
season_start_month: int = 3,
base_temp: float = GDD_BASE_TEMP_C,
) -> GDDState:
"""Accumulate Growing Degree Days from IMS 15-min temperature data.
Parameters
----------
ims_df : DataFrame
IMS data with columns: timestamp_utc, tdmax_c, tdmin_c
(or air_temperature_c for fallback).
season_start_month : int
Month when GDD accumulation starts (default: March).
base_temp : float
Base temperature for GDD calculation (default: 10°C).
Returns
-------
GDDState with cumulative GDD and estimated stage.
"""
df = ims_df.copy()
if "timestamp_utc" in df.columns:
df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True)
df = df.set_index("timestamp_utc")
# Determine current year's season start
now = df.index.max()
year = now.year if now.month >= season_start_month else now.year - 1
season_start = pd.Timestamp(f"{year}-{season_start_month:02d}-01", tz="UTC")
df_season = df[df.index >= season_start]
if df_season.empty:
return GDDState(
cumulative_gdd=0.0, stage_id="winter_dormancy",
stage_label="Winter dormancy", days_computed=0,
season_start=str(season_start.date()), last_date=str(now.date()),
)
# Daily aggregation: use tdmax/tdmin if available, else air_temperature_c
if "tdmax_c" in df_season.columns and "tdmin_c" in df_season.columns:
daily = df_season.resample("D").agg({
"tdmax_c": "max",
"tdmin_c": "min",
}).dropna()
daily["tavg"] = (daily["tdmax_c"] + daily["tdmin_c"]) / 2.0
elif "air_temperature_c" in df_season.columns:
daily = df_season.resample("D").agg({
"air_temperature_c": ["max", "min"],
}).dropna()
daily.columns = ["tmax", "tmin"]
daily["tavg"] = (daily["tmax"] + daily["tmin"]) / 2.0
else:
return GDDState(
cumulative_gdd=0.0, stage_id="unknown",
stage_label="Unknown", days_computed=0,
season_start=str(season_start.date()),
last_date=str(now.date()),
)
# GDD per day: max(0, tavg - base_temp)
daily["gdd"] = np.maximum(0.0, daily["tavg"] - base_temp)
daily["cumulative_gdd"] = daily["gdd"].cumsum()
cumulative = float(daily["cumulative_gdd"].iloc[-1])
stage = estimate_stage_by_gdd(cumulative)
return GDDState(
cumulative_gdd=round(cumulative, 1),
stage_id=stage.id,
stage_label=stage.label,
days_computed=len(daily),
season_start=str(season_start.date()),
last_date=str(daily.index[-1].date()),
daily_gdd=[round(g, 2) for g in daily["gdd"].tolist()],
)
def estimate_stage_by_gdd(cumulative_gdd: float) -> PhenologyStage:
"""Estimate phenological stage from cumulative Growing Degree Days.
Uses thresholds from config/settings.py PHENOLOGY_GDD_THRESHOLDS.
"""
# Sort thresholds descending to find highest matched stage
sorted_stages = sorted(
PHENOLOGY_GDD_THRESHOLDS.items(),
key=lambda x: x[1],
reverse=True,
)
# Map GDD threshold names to stage IDs
gdd_to_stage = {
"harvest": "post_harvest_reserves",
"veraison": "veraison_ripening",
"fruit_set": "berry_growth",
"flowering": "flowering_fruit_set",
"budburst": "budburst_vegetative",
}
for stage_name, threshold in sorted_stages:
if cumulative_gdd >= threshold:
stage_id = gdd_to_stage.get(stage_name, "budburst_vegetative")
if stage_id in _STAGE_DEFINITIONS:
return _STAGE_DEFINITIONS[stage_id]
# Below budburst threshold
return _STAGE_DEFINITIONS["winter_dormancy"]
# ---------------------------------------------------------------------------
# 3. Camera-based detection (Gemini Vision)
# ---------------------------------------------------------------------------
def detect_stage_from_camera(
camera_url: str = CAMERA_URL,
gdd_stage_hint: Optional[str] = None,
api_key: Optional[str] = None,
timeout: float = 15.0,
) -> Optional[CameraDetection]:
"""Analyze live vineyard camera image for phenological stage using Gemini Vision.
Parameters
----------
camera_url : str
URL of the live camera image.
gdd_stage_hint : str, optional
Current GDD-based stage estimate (provides context to Gemini).
api_key : str, optional
Google API key. If None, resolved from environment/Streamlit secrets.
timeout : float
HTTP timeout for fetching the camera image.
Returns
-------
CameraDetection or None if the image cannot be fetched or analyzed.
"""
import requests
# Fetch camera image
try:
resp = requests.get(camera_url, timeout=timeout)
resp.raise_for_status()
image_bytes = resp.content
except Exception as exc:
logger.warning("Camera image fetch failed: %s", exc)
return None
# Initialize Gemini client
try:
from src.genai.utils import get_genai_client
from google.genai import types
client = get_genai_client(api_key)
except Exception as exc:
logger.warning("Gemini client init failed: %s", exc)
return None
# Build prompt
hint_text = ""
if gdd_stage_hint:
hint_text = (
f"\nCurrent GDD-based estimate: {gdd_stage_hint}. "
"Does the visual evidence match this estimate?"
)
prompt_text = (
"You are a viticulture expert analyzing a live camera image from an "
"agrivoltaic vineyard in Yeruham, Negev desert, Israel. "
"The grape variety is Chenin Blanc trained on a VSP trellis under solar panels.\n\n"
"Analyze the image and determine the current phenological (growth) stage. "
"Look for:\n"
"- Bare canes with no leaves → winter dormancy\n"
"- Small green shoots emerging from buds → budburst\n"
"- Tiny flower clusters (inflorescences) visible → flowering\n"
"- Small green berries visible on clusters → fruit set / berry growth\n"
"- Berries changing color (green to yellow/translucent) → veraison\n"
"- Ripe colored berries, some leaf senescence → harvest / post-harvest\n"
"- Full canopy with large green leaves but no visible fruit → vegetative growth\n"
f"{hint_text}\n\n"
"Respond in exactly this JSON format (no other text):\n"
'{\n'
' "detected_stage": "one of: winter_dormancy, budburst_vegetative, '
'flowering_fruit_set, berry_growth, veraison_ripening, post_harvest_reserves",\n'
' "confidence": "high, medium, or low",\n'
' "observations": "brief description of what you see in the image"\n'
'}'
)
# Call Gemini with image
try:
image_part = types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[prompt_text, image_part],
)
text = response.text
except Exception as exc:
logger.warning("Gemini vision call failed: %s", exc)
return None
# Parse JSON response
try:
from src.genai.utils import extract_json_object
result = extract_json_object(text)
except (ValueError, Exception):
logger.warning("Could not parse Gemini vision response: %s", text[:200])
return None
detected_id = result.get("detected_stage", "unknown")
confidence = result.get("confidence", "low")
observations = result.get("observations", "")
matches_gdd = None
if gdd_stage_hint:
matches_gdd = detected_id == gdd_stage_hint
return CameraDetection(
detected_stage_id=detected_id,
confidence=confidence,
observations=observations,
matches_gdd=matches_gdd,
)
# ---------------------------------------------------------------------------
# Public API (backward-compatible)
# ---------------------------------------------------------------------------
def estimate_stage_for_date(
d: date,
ims_df: Optional[pd.DataFrame] = None,
) -> PhenologyStage:
"""Estimate phenological stage for a given calendar date.
Uses GDD-based estimation if IMS data is provided, otherwise
falls back to calendar-based estimation.
Parameters
----------
d : datetime.date
Local calendar date at the vineyard.
ims_df : DataFrame, optional
IMS temperature data for GDD computation. If None, uses calendar.
"""
if ims_df is not None and not ims_df.empty:
try:
gdd_state = compute_gdd_from_ims(ims_df)
if gdd_state.cumulative_gdd > 0:
return estimate_stage_by_gdd(gdd_state.cumulative_gdd)
except Exception as exc:
logger.warning("GDD estimation failed, falling back to calendar: %s", exc)
return _estimate_stage_by_month(d.month)
def estimate_stage_for_timestamp(ts: datetime) -> PhenologyStage:
"""Estimate phenological stage for a datetime (local or UTC).
The calendar date component is used; timezone is ignored for stage.
"""
return estimate_stage_for_date(ts.date())
def stage_id_and_description_for_date(d: date) -> Tuple[str, str]:
"""Convenience wrapper returning (id, description) for Streamlit/UI use."""
stage = estimate_stage_for_date(d)
return stage.id, stage.description
# Month boundaries for each stage (first month of each stage)
_STAGE_MONTH_RANGES = [
(1, 2, "winter_dormancy", "Winter dormancy"),
(3, 4, "budburst_vegetative", "Budburst / early vegetative"),
(5, 5, "flowering_fruit_set", "Flowering / fruit set"),
(6, 7, "berry_growth", "Berry growth"),
(8, 8, "veraison_ripening", "Veraison / ripening"),
(9, 10, "post_harvest_reserves", "Post-harvest reserve building"),
(11, 12, "winter_dormancy", "Winter dormancy"),
]
def next_stage_for_date(d: date) -> Tuple[str, str, int]:
"""Return (next_stage_label, next_stage_id, days_until) for a given date.
Walks the calendar forward from current month to find the first stage
boundary that differs from the current stage.
"""
current = estimate_stage_for_date(d)
# Find which range block we are in, then look at the next one
for i, (m_start, m_end, sid, label) in enumerate(_STAGE_MONTH_RANGES):
if m_start <= d.month <= m_end:
# Next block (wrap around)
nxt = _STAGE_MONTH_RANGES[(i + 1) % len(_STAGE_MONTH_RANGES)]
next_month_start = nxt[0]
# Build the date of the 1st of that month
if next_month_start > d.month:
next_date = date(d.year, next_month_start, 1)
else:
next_date = date(d.year + 1, next_month_start, 1)
days_until = (next_date - d).days
return nxt[3], nxt[2], days_until
# Fallback
return "Unknown", "unknown", 0
# ---------------------------------------------------------------------------
# Combined estimation (all three methods)
# ---------------------------------------------------------------------------
def estimate_stage_combined(
d: date,
ims_df: Optional[pd.DataFrame] = None,
use_camera: bool = False,
api_key: Optional[str] = None,
) -> Tuple[PhenologyStage, dict]:
"""Estimate phenological stage using all available methods.
Returns (stage, metadata) where metadata contains:
- method: "gdd", "camera", or "calendar"
- gdd_state: GDDState if computed
- camera_detection: CameraDetection if attempted
- calendar_stage: always present (fallback)
Priority: camera (high confidence) > GDD > calendar.
"""
metadata: dict = {"method": "calendar"}
# Calendar (always computed as baseline)
calendar_stage = _estimate_stage_by_month(d.month)
metadata["calendar_stage"] = calendar_stage.id
best_stage = calendar_stage
# GDD (if IMS data available)
gdd_state: Optional[GDDState] = None
if ims_df is not None and not ims_df.empty:
try:
gdd_state = compute_gdd_from_ims(ims_df)
metadata["gdd_state"] = gdd_state
if gdd_state.cumulative_gdd > 0:
best_stage = estimate_stage_by_gdd(gdd_state.cumulative_gdd)
metadata["method"] = "gdd"
except Exception as exc:
logger.warning("GDD estimation failed: %s", exc)
# Camera (if requested and Gemini available)
if use_camera:
gdd_hint = gdd_state.stage_id if gdd_state else None
detection = detect_stage_from_camera(
gdd_stage_hint=gdd_hint, api_key=api_key,
)
if detection:
metadata["camera_detection"] = detection
# Camera overrides GDD/calendar only if confidence is high
if detection.confidence == "high" and detection.detected_stage_id in _STAGE_DEFINITIONS:
best_stage = _STAGE_DEFINITIONS[detection.detected_stage_id]
metadata["method"] = "camera"
return best_stage, metadata