File size: 8,245 Bytes
938949f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""
BaselinePredictor: hybrid FvCB + ML photosynthesis baseline for day-ahead planning.

Provides a single ``predict_day()`` method that:
  1. Runs FvCB (Farquhar–Greer–Weedon) for each slot using forecast weather
  2. Optionally runs a trained ML model for the same slots
  3. Uses the RoutingAgent's rule-based logic to pick the better prediction per slot
  4. Returns a 96-slot profile of predicted photosynthesis rate A (µmol CO₂ m⁻² s⁻¹)

This feeds into the DayAheadPlanner to estimate crop value for each slot,
replacing the current temperature-only heuristic with an actual photosynthesis
prediction that captures the Rubisco transition more accurately.
"""

from __future__ import annotations

import logging
import math
from datetime import date
from typing import List, Optional

import numpy as np

from config.settings import SEMILLON_TRANSITION_TEMP_C

logger = logging.getLogger(__name__)


class BaselinePredictor:
    """Hybrid FvCB + ML photosynthesis prediction for day-ahead planning.

    Parameters
    ----------
    fvcb_model : FarquharModel, optional
        Lazy-initialised if not provided.
    ml_predictor : PhotosynthesisPredictor, optional
        Trained ML model. If None, FvCB-only mode is used.
    routing_agent : RoutingAgent, optional
        Model router for per-slot FvCB/ML selection.
        If None, uses rule-based routing only (no API calls).
    """

    def __init__(
        self,
        fvcb_model=None,
        ml_predictor=None,
        routing_agent=None,
    ):
        self._fvcb = fvcb_model
        self._ml = ml_predictor
        self._router = routing_agent

    @property
    def fvcb(self):
        if self._fvcb is None:
            from src.models.farquhar_model import FarquharModel
            self._fvcb = FarquharModel()
        return self._fvcb

    # ------------------------------------------------------------------
    # Main API
    # ------------------------------------------------------------------

    def predict_day(
        self,
        forecast_temps: List[float],
        forecast_ghi: List[float],
        co2_ppm: float = 400.0,
        rh_pct: float = 40.0,
    ) -> List[float]:
        """Predict photosynthesis rate A for each 15-min slot.

        Parameters
        ----------
        forecast_temps : list of 96 floats
            Forecast air temperature (°C) per slot.
        forecast_ghi : list of 96 floats
            Forecast GHI (W/m²) per slot.
        co2_ppm : float
            Atmospheric CO₂ concentration (default 400 ppm).
        rh_pct : float
            Relative humidity (%) for VPD estimation (default 40%).

        Returns
        -------
        list of 96 floats
            Predicted net photosynthesis A (µmol CO₂ m⁻² s⁻¹) per slot.
            0.0 for nighttime slots.
        """
        assert len(forecast_temps) == 96 and len(forecast_ghi) == 96

        # FvCB predictions for all 96 slots
        fvcb_predictions = self._predict_fvcb(
            forecast_temps, forecast_ghi, co2_ppm, rh_pct,
        )

        # If no ML model, return FvCB-only
        if self._ml is None:
            return fvcb_predictions

        # ML predictions for all 96 slots
        ml_predictions = self._predict_ml(forecast_temps, forecast_ghi)

        # Route each slot
        predictions = self._route_predictions(
            forecast_temps, forecast_ghi,
            fvcb_predictions, ml_predictions,
        )

        return predictions

    # ------------------------------------------------------------------
    # FvCB predictions
    # ------------------------------------------------------------------

    def _predict_fvcb(
        self,
        temps: List[float],
        ghis: List[float],
        co2_ppm: float,
        rh_pct: float,
    ) -> List[float]:
        """Run FvCB for each slot. Returns 96 A values."""
        predictions = []
        for i in range(96):
            temp = temps[i]
            ghi = ghis[i]

            # Nighttime or negligible light
            if ghi < 50:
                predictions.append(0.0)
                continue

            # Estimate PAR from GHI (roughly 2× conversion for photosynthetically active)
            par = ghi * 2.0

            # Estimate Tleaf from Tair (proxy: +2°C under sun)
            tleaf = temp + 2.0

            # Estimate VPD from temperature and RH
            vpd = self._estimate_vpd(temp, rh_pct)

            try:
                result = self.fvcb.calc_photosynthesis_semillon(
                    PAR=par,
                    Tleaf=tleaf,
                    CO2=co2_ppm,
                    VPD=vpd,
                    Tair=temp,
                )
                # Returns (A, limiting_state, shading_helps)
                A = result[0] if isinstance(result, tuple) else result
                predictions.append(max(0.0, float(A)))
            except Exception as exc:
                logger.debug("FvCB failed at slot %d: %s", i, exc)
                predictions.append(0.0)

        return predictions

    @staticmethod
    def _estimate_vpd(tair_c: float, rh_pct: float) -> float:
        """Estimate VPD (kPa) from air temperature and relative humidity."""
        # Tetens formula for saturated vapor pressure
        es = 0.6108 * math.exp(17.27 * tair_c / (tair_c + 237.3))
        ea = es * rh_pct / 100.0
        return max(0.0, es - ea)

    # ------------------------------------------------------------------
    # ML predictions
    # ------------------------------------------------------------------

    def _predict_ml(
        self,
        temps: List[float],
        ghis: List[float],
    ) -> List[float]:
        """Run ML model for each slot. Returns 96 A values."""
        if self._ml is None:
            return [0.0] * 96

        try:
            import pandas as pd

            # Build feature DataFrame matching ML model's expected features
            hours = [i * 0.25 for i in range(96)]
            df = pd.DataFrame({
                "air_temperature_c": temps,
                "ghi_w_m2": ghis,
                "hour": [int(h) for h in hours],
                "minute": [int((h % 1) * 60) for h in hours],
            })

            # Try prediction with the best model
            best_model = None
            best_mae = float("inf")
            for name, result in self._ml.results.items():
                if result.get("mae", float("inf")) < best_mae:
                    best_mae = result["mae"]
                    best_model = name

            if best_model and best_model in self._ml.models:
                model = self._ml.models[best_model]
                # Use whatever features the model was trained on
                feature_cols = [c for c in df.columns if c in getattr(model, "feature_names_in_", df.columns)]
                if feature_cols:
                    preds = model.predict(df[feature_cols])
                    return [max(0.0, float(p)) for p in preds]

        except Exception as exc:
            logger.warning("ML prediction failed: %s", exc)

        return [0.0] * 96

    # ------------------------------------------------------------------
    # Routing
    # ------------------------------------------------------------------

    def _route_predictions(
        self,
        temps: List[float],
        ghis: List[float],
        fvcb_preds: List[float],
        ml_preds: List[float],
    ) -> List[float]:
        """Pick FvCB or ML per slot using routing logic."""
        from src.chatbot.routing_agent import RoutingAgent

        predictions = []
        for i in range(96):
            telemetry = {
                "temp_c": temps[i],
                "ghi_w_m2": ghis[i],
                "hour": i // 4,
            }

            # Use rule-based routing only (no API calls for batch prediction)
            choice = RoutingAgent._rule_based_route(telemetry)
            if choice is None:
                # Transition zone: weight FvCB 60% / ML 40% as compromise
                a = 0.6 * fvcb_preds[i] + 0.4 * ml_preds[i]
            elif choice == "ml":
                a = ml_preds[i]
            else:
                a = fvcb_preds[i]

            predictions.append(a)

        return predictions