""" Phenology tracker for Semillon grapevines in the Negev (Sde Boker). Three estimation methods (highest confidence wins): 1. **GDD-based** — accumulates Growing Degree Days from IMS temperature data using base temperature 10°C and thresholds in config/settings.py. 2. **Camera-based** — sends live vineyard image to Gemini Vision for visual phenological stage detection (optional, requires API key). 3. **Calendar-based** — fallback using month-to-stage mapping. The public API is unchanged: ``estimate_stage_for_date()`` remains the primary entry point. New functions ``estimate_stage_by_gdd()`` and ``detect_stage_from_camera()`` are available for higher-confidence estimates. """ from __future__ import annotations import logging from dataclasses import dataclass, field from datetime import date, datetime from pathlib import Path from typing import List, Optional, Tuple import numpy as np import pandas as pd from config.settings import PHENOLOGY_GDD_THRESHOLDS logger = logging.getLogger(__name__) # Base temperature for GDD accumulation (Semillon at Sde Boker) GDD_BASE_TEMP_C = 10.0 # Camera URL for live vineyard feed CAMERA_URL = "https://app.solarwine.ai/images/yerucham/last_view.jpg" # --------------------------------------------------------------------------- # Data containers # --------------------------------------------------------------------------- @dataclass(frozen=True) class PhenologyStage: """Simple container for phenological stage metadata.""" id: str label: str description: str @dataclass class GDDState: """Cumulative GDD tracking state.""" cumulative_gdd: float stage_id: str stage_label: str days_computed: int season_start: str # ISO date last_date: str # ISO date daily_gdd: List[float] = field(default_factory=list) @dataclass class CameraDetection: """Result of camera-based phenological stage detection.""" detected_stage_id: str confidence: str # "high", "medium", "low" observations: str # what Gemini saw matches_gdd: Optional[bool] = None # whether it agrees with GDD estimate # --------------------------------------------------------------------------- # Stage definitions (shared across all estimation methods) # --------------------------------------------------------------------------- _STAGE_DEFINITIONS = { "winter_dormancy": PhenologyStage( id="winter_dormancy", label="Winter dormancy", description=( "No active canopy or fruit. Vines are resting and rebuilding " "reserves in trunk and roots. Panels track the sun at full " "astronomical tracking — maximum energy generation." ), ), "budburst_vegetative": PhenologyStage( id="budburst_vegetative", label="Budburst / early vegetative", description=( "New shoots and leaves are expanding. Vine is light-limited " "(RuBP regime) — every photon drives canopy growth. Panels at " "full tracking; energy and vine interests are fully aligned." ), ), "flowering_fruit_set": PhenologyStage( id="flowering_fruit_set", label="Flowering / fruit set", description=( "Yield formation is highly sensitive — cluster number and berry " "set are determined now. Light demand is at its peak. Panels at " "full tracking; energy generation and vine needs are aligned." ), ), "berry_growth": PhenologyStage( id="berry_growth", label="Berry growth", description=( "Canopy is substantial and berries are expanding rapidly. " "Water management is critical. Heat stress may begin to " "limit photosynthesis on the hottest afternoons (Rubisco regime)." ), ), "veraison_ripening": PhenologyStage( id="veraison_ripening", label="Veraison / ripening", description=( "Berry ripening and flavour development dominate. Sugar loading " "depends on upper-canopy photosynthesis. Fruiting zone is most " "vulnerable to sunburn on hot afternoons (>35C)." ), ), "post_harvest_reserves": PhenologyStage( id="post_harvest_reserves", label="Post-harvest reserve building", description=( "Berries mostly harvested. Canopy refills carbohydrate reserves " "for next season. Healthy leaves are essential for reserve " "accumulation. Energy generation is the primary output." ), ), } # --------------------------------------------------------------------------- # 1. Calendar-based estimation (original fallback) # --------------------------------------------------------------------------- def _estimate_stage_by_month(month: int) -> PhenologyStage: """Approximate Semillon stage in the Negev using calendar month.""" if month in (1, 2, 11, 12): return _STAGE_DEFINITIONS["winter_dormancy"] if month in (3, 4): return _STAGE_DEFINITIONS["budburst_vegetative"] if month == 5: return _STAGE_DEFINITIONS["flowering_fruit_set"] if month in (6, 7): return _STAGE_DEFINITIONS["berry_growth"] if month == 8: return _STAGE_DEFINITIONS["veraison_ripening"] if month in (9, 10): return _STAGE_DEFINITIONS["post_harvest_reserves"] return PhenologyStage(id="unknown", label="Unknown", description="Phenological stage could not be determined.") # --------------------------------------------------------------------------- # 2. GDD-based estimation # --------------------------------------------------------------------------- def compute_gdd_from_ims( ims_df: pd.DataFrame, season_start_month: int = 3, base_temp: float = GDD_BASE_TEMP_C, ) -> GDDState: """Accumulate Growing Degree Days from IMS 15-min temperature data. Parameters ---------- ims_df : DataFrame IMS data with columns: timestamp_utc, tdmax_c, tdmin_c (or air_temperature_c for fallback). season_start_month : int Month when GDD accumulation starts (default: March). base_temp : float Base temperature for GDD calculation (default: 10°C). Returns ------- GDDState with cumulative GDD and estimated stage. """ df = ims_df.copy() if "timestamp_utc" in df.columns: df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) df = df.set_index("timestamp_utc") # Determine current year's season start now = df.index.max() year = now.year if now.month >= season_start_month else now.year - 1 season_start = pd.Timestamp(f"{year}-{season_start_month:02d}-01", tz="UTC") df_season = df[df.index >= season_start] if df_season.empty: return GDDState( cumulative_gdd=0.0, stage_id="winter_dormancy", stage_label="Winter dormancy", days_computed=0, season_start=str(season_start.date()), last_date=str(now.date()), ) # Daily aggregation: use tdmax/tdmin if available, else air_temperature_c if "tdmax_c" in df_season.columns and "tdmin_c" in df_season.columns: daily = df_season.resample("D").agg({ "tdmax_c": "max", "tdmin_c": "min", }).dropna() daily["tavg"] = (daily["tdmax_c"] + daily["tdmin_c"]) / 2.0 elif "air_temperature_c" in df_season.columns: daily = df_season.resample("D").agg({ "air_temperature_c": ["max", "min"], }).dropna() daily.columns = ["tmax", "tmin"] daily["tavg"] = (daily["tmax"] + daily["tmin"]) / 2.0 else: return GDDState( cumulative_gdd=0.0, stage_id="unknown", stage_label="Unknown", days_computed=0, season_start=str(season_start.date()), last_date=str(now.date()), ) # GDD per day: max(0, tavg - base_temp) daily["gdd"] = np.maximum(0.0, daily["tavg"] - base_temp) daily["cumulative_gdd"] = daily["gdd"].cumsum() cumulative = float(daily["cumulative_gdd"].iloc[-1]) stage = estimate_stage_by_gdd(cumulative) return GDDState( cumulative_gdd=round(cumulative, 1), stage_id=stage.id, stage_label=stage.label, days_computed=len(daily), season_start=str(season_start.date()), last_date=str(daily.index[-1].date()), daily_gdd=[round(g, 2) for g in daily["gdd"].tolist()], ) def estimate_stage_by_gdd(cumulative_gdd: float) -> PhenologyStage: """Estimate phenological stage from cumulative Growing Degree Days. Uses thresholds from config/settings.py PHENOLOGY_GDD_THRESHOLDS. """ # Sort thresholds descending to find highest matched stage sorted_stages = sorted( PHENOLOGY_GDD_THRESHOLDS.items(), key=lambda x: x[1], reverse=True, ) # Map GDD threshold names to stage IDs gdd_to_stage = { "harvest": "post_harvest_reserves", "veraison": "veraison_ripening", "fruit_set": "berry_growth", "flowering": "flowering_fruit_set", "budburst": "budburst_vegetative", } for stage_name, threshold in sorted_stages: if cumulative_gdd >= threshold: stage_id = gdd_to_stage.get(stage_name, "budburst_vegetative") if stage_id in _STAGE_DEFINITIONS: return _STAGE_DEFINITIONS[stage_id] # Below budburst threshold return _STAGE_DEFINITIONS["winter_dormancy"] # --------------------------------------------------------------------------- # 3. Camera-based detection (Gemini Vision) # --------------------------------------------------------------------------- def detect_stage_from_camera( camera_url: str = CAMERA_URL, gdd_stage_hint: Optional[str] = None, api_key: Optional[str] = None, timeout: float = 15.0, ) -> Optional[CameraDetection]: """Analyze live vineyard camera image for phenological stage using Gemini Vision. Parameters ---------- camera_url : str URL of the live camera image. gdd_stage_hint : str, optional Current GDD-based stage estimate (provides context to Gemini). api_key : str, optional Google API key. If None, resolved from environment/Streamlit secrets. timeout : float HTTP timeout for fetching the camera image. Returns ------- CameraDetection or None if the image cannot be fetched or analyzed. """ import requests # Fetch camera image try: resp = requests.get(camera_url, timeout=timeout) resp.raise_for_status() image_bytes = resp.content except Exception as exc: logger.warning("Camera image fetch failed: %s", exc) return None # Initialize Gemini client try: from src.genai.utils import get_genai_client from google.genai import types client = get_genai_client(api_key) except Exception as exc: logger.warning("Gemini client init failed: %s", exc) return None # Build prompt hint_text = "" if gdd_stage_hint: hint_text = ( f"\nCurrent GDD-based estimate: {gdd_stage_hint}. " "Does the visual evidence match this estimate?" ) prompt_text = ( "You are a viticulture expert analyzing a live camera image from an " "agrivoltaic vineyard in Yeruham, Negev desert, Israel. " "The grape variety is Chenin Blanc trained on a VSP trellis under solar panels.\n\n" "Analyze the image and determine the current phenological (growth) stage. " "Look for:\n" "- Bare canes with no leaves → winter dormancy\n" "- Small green shoots emerging from buds → budburst\n" "- Tiny flower clusters (inflorescences) visible → flowering\n" "- Small green berries visible on clusters → fruit set / berry growth\n" "- Berries changing color (green to yellow/translucent) → veraison\n" "- Ripe colored berries, some leaf senescence → harvest / post-harvest\n" "- Full canopy with large green leaves but no visible fruit → vegetative growth\n" f"{hint_text}\n\n" "Respond in exactly this JSON format (no other text):\n" '{\n' ' "detected_stage": "one of: winter_dormancy, budburst_vegetative, ' 'flowering_fruit_set, berry_growth, veraison_ripening, post_harvest_reserves",\n' ' "confidence": "high, medium, or low",\n' ' "observations": "brief description of what you see in the image"\n' '}' ) # Call Gemini with image try: image_part = types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg") response = client.models.generate_content( model="gemini-2.5-flash", contents=[prompt_text, image_part], ) text = response.text except Exception as exc: logger.warning("Gemini vision call failed: %s", exc) return None # Parse JSON response try: from src.genai.utils import extract_json_object result = extract_json_object(text) except (ValueError, Exception): logger.warning("Could not parse Gemini vision response: %s", text[:200]) return None detected_id = result.get("detected_stage", "unknown") confidence = result.get("confidence", "low") observations = result.get("observations", "") matches_gdd = None if gdd_stage_hint: matches_gdd = detected_id == gdd_stage_hint return CameraDetection( detected_stage_id=detected_id, confidence=confidence, observations=observations, matches_gdd=matches_gdd, ) # --------------------------------------------------------------------------- # Public API (backward-compatible) # --------------------------------------------------------------------------- def estimate_stage_for_date( d: date, ims_df: Optional[pd.DataFrame] = None, ) -> PhenologyStage: """Estimate phenological stage for a given calendar date. Uses GDD-based estimation if IMS data is provided, otherwise falls back to calendar-based estimation. Parameters ---------- d : datetime.date Local calendar date at the vineyard. ims_df : DataFrame, optional IMS temperature data for GDD computation. If None, uses calendar. """ if ims_df is not None and not ims_df.empty: try: gdd_state = compute_gdd_from_ims(ims_df) if gdd_state.cumulative_gdd > 0: return estimate_stage_by_gdd(gdd_state.cumulative_gdd) except Exception as exc: logger.warning("GDD estimation failed, falling back to calendar: %s", exc) return _estimate_stage_by_month(d.month) def estimate_stage_for_timestamp(ts: datetime) -> PhenologyStage: """Estimate phenological stage for a datetime (local or UTC). The calendar date component is used; timezone is ignored for stage. """ return estimate_stage_for_date(ts.date()) def stage_id_and_description_for_date(d: date) -> Tuple[str, str]: """Convenience wrapper returning (id, description) for Streamlit/UI use.""" stage = estimate_stage_for_date(d) return stage.id, stage.description # Month boundaries for each stage (first month of each stage) _STAGE_MONTH_RANGES = [ (1, 2, "winter_dormancy", "Winter dormancy"), (3, 4, "budburst_vegetative", "Budburst / early vegetative"), (5, 5, "flowering_fruit_set", "Flowering / fruit set"), (6, 7, "berry_growth", "Berry growth"), (8, 8, "veraison_ripening", "Veraison / ripening"), (9, 10, "post_harvest_reserves", "Post-harvest reserve building"), (11, 12, "winter_dormancy", "Winter dormancy"), ] def next_stage_for_date(d: date) -> Tuple[str, str, int]: """Return (next_stage_label, next_stage_id, days_until) for a given date. Walks the calendar forward from current month to find the first stage boundary that differs from the current stage. """ current = estimate_stage_for_date(d) # Find which range block we are in, then look at the next one for i, (m_start, m_end, sid, label) in enumerate(_STAGE_MONTH_RANGES): if m_start <= d.month <= m_end: # Next block (wrap around) nxt = _STAGE_MONTH_RANGES[(i + 1) % len(_STAGE_MONTH_RANGES)] next_month_start = nxt[0] # Build the date of the 1st of that month if next_month_start > d.month: next_date = date(d.year, next_month_start, 1) else: next_date = date(d.year + 1, next_month_start, 1) days_until = (next_date - d).days return nxt[3], nxt[2], days_until # Fallback return "Unknown", "unknown", 0 # --------------------------------------------------------------------------- # Combined estimation (all three methods) # --------------------------------------------------------------------------- def estimate_stage_combined( d: date, ims_df: Optional[pd.DataFrame] = None, use_camera: bool = False, api_key: Optional[str] = None, ) -> Tuple[PhenologyStage, dict]: """Estimate phenological stage using all available methods. Returns (stage, metadata) where metadata contains: - method: "gdd", "camera", or "calendar" - gdd_state: GDDState if computed - camera_detection: CameraDetection if attempted - calendar_stage: always present (fallback) Priority: camera (high confidence) > GDD > calendar. """ metadata: dict = {"method": "calendar"} # Calendar (always computed as baseline) calendar_stage = _estimate_stage_by_month(d.month) metadata["calendar_stage"] = calendar_stage.id best_stage = calendar_stage # GDD (if IMS data available) gdd_state: Optional[GDDState] = None if ims_df is not None and not ims_df.empty: try: gdd_state = compute_gdd_from_ims(ims_df) metadata["gdd_state"] = gdd_state if gdd_state.cumulative_gdd > 0: best_stage = estimate_stage_by_gdd(gdd_state.cumulative_gdd) metadata["method"] = "gdd" except Exception as exc: logger.warning("GDD estimation failed: %s", exc) # Camera (if requested and Gemini available) if use_camera: gdd_hint = gdd_state.stage_id if gdd_state else None detection = detect_stage_from_camera( gdd_stage_hint=gdd_hint, api_key=api_key, ) if detection: metadata["camera_detection"] = detection # Camera overrides GDD/calendar only if confidence is high if detection.confidence == "high" and detection.detected_stage_id in _STAGE_DEFINITIONS: best_stage = _STAGE_DEFINITIONS[detection.detected_stage_id] metadata["method"] = "camera" return best_stage, metadata