Spaces:
Running
Running
Sync from GitHub (tests passed)
Browse files- deep_learning/config.py +4 -2
- deep_learning/training/hyperopt.py +47 -11
deep_learning/config.py
CHANGED
|
@@ -73,8 +73,10 @@ class TFTModelConfig:
|
|
| 73 |
hidden_size: int = 32
|
| 74 |
# attention_head_size 4β2: fewer heads for a small, single-series dataset.
|
| 75 |
attention_head_size: int = 2
|
| 76 |
-
# dropout 0.1β0.
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
hidden_continuous_size: int = 16 # was 32; paired reduction with hidden_size
|
| 79 |
quantiles: tuple[float, ...] = (0.02, 0.10, 0.25, 0.50, 0.75, 0.90, 0.98)
|
| 80 |
# lr 1e-3β3e-4: smaller batches produce noisier gradients; conservative LR
|
|
|
|
| 73 |
hidden_size: int = 32
|
| 74 |
# attention_head_size 4β2: fewer heads for a small, single-series dataset.
|
| 75 |
attention_head_size: int = 2
|
| 76 |
+
# dropout 0.1β0.20 (REG-2026-001): 313 samples with dropout<0.20 caused
|
| 77 |
+
# co-adaptation and memorization. Kept β€0.35 because higher values with
|
| 78 |
+
# small hidden_size collapse the output range.
|
| 79 |
+
dropout: float = 0.20
|
| 80 |
hidden_continuous_size: int = 16 # was 32; paired reduction with hidden_size
|
| 81 |
quantiles: tuple[float, ...] = (0.02, 0.10, 0.25, 0.50, 0.75, 0.90, 0.98)
|
| 82 |
# lr 1e-3β3e-4: smaller batches produce noisier gradients; conservative LR
|
deep_learning/training/hyperopt.py
CHANGED
|
@@ -47,10 +47,13 @@ def create_trial_config(trial, base_cfg: TFTASROConfig) -> TFTASROConfig:
|
|
| 47 |
# compressing output distribution and preventing amplitude learning.
|
| 48 |
hidden_size=trial.suggest_int("hidden_size", 32, 64, step=16),
|
| 49 |
attention_head_size=trial.suggest_int("attention_head_size", 1, 4),
|
| 50 |
-
#
|
| 51 |
-
#
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
quantiles=base_cfg.model.quantiles,
|
| 55 |
# Range [1e-4, 1e-3]: LR < 1e-4 produces near-zero pred_std (VR=0.14);
|
| 56 |
# LR > 1e-3 causes 1-epoch divergence. This band is the stable zone.
|
|
@@ -71,9 +74,9 @@ def create_trial_config(trial, base_cfg: TFTASROConfig) -> TFTASROConfig:
|
|
| 71 |
training_cfg = TrainingConfig(
|
| 72 |
max_epochs=50,
|
| 73 |
early_stopping_patience=8,
|
| 74 |
-
#
|
| 75 |
-
#
|
| 76 |
-
batch_size=trial.suggest_categorical("batch_size", [16, 32
|
| 77 |
val_ratio=base_cfg.training.val_ratio,
|
| 78 |
test_ratio=base_cfg.training.test_ratio,
|
| 79 |
lookback_days=base_cfg.training.lookback_days,
|
|
@@ -202,13 +205,46 @@ def _objective(trial, base_cfg: TFTASROConfig, master_data: tuple) -> float:
|
|
| 202 |
|
| 203 |
trial.set_user_attr("variance_ratio", round(vr, 4))
|
| 204 |
trial.set_user_attr("pred_std", round(pred_std, 6))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
except Exception as exc:
|
| 206 |
-
logger.debug("Trial %d variance check failed: %s", trial.number, exc)
|
|
|
|
| 207 |
|
| 208 |
-
score = float(val_loss) + variance_penalty
|
| 209 |
logger.info(
|
| 210 |
-
"Trial %d: val_loss=%.4f vr_penalty=%.4f β score=%.4f",
|
| 211 |
-
trial.number, float(val_loss), variance_penalty, score,
|
| 212 |
)
|
| 213 |
return score
|
| 214 |
|
|
|
|
| 47 |
# compressing output distribution and preventing amplitude learning.
|
| 48 |
hidden_size=trial.suggest_int("hidden_size", 32, 64, step=16),
|
| 49 |
attention_head_size=trial.suggest_int("attention_head_size", 1, 4),
|
| 50 |
+
# Floor at 0.20: 313 samples with dropout<0.20 causes co-adaptation
|
| 51 |
+
# and memorization (REG-2026-001). Cap at 0.35: dropout>0.35 with
|
| 52 |
+
# small hidden_size collapses the output range.
|
| 53 |
+
dropout=trial.suggest_float("dropout", 0.20, 0.35, step=0.05),
|
| 54 |
+
# Cap at 24: hidden_cont=32 doubled the VSN parameter surface and
|
| 55 |
+
# contributed to overfitting in the 31-Mar regression.
|
| 56 |
+
hidden_continuous_size=trial.suggest_int("hidden_continuous_size", 8, 24, step=8),
|
| 57 |
quantiles=base_cfg.model.quantiles,
|
| 58 |
# Range [1e-4, 1e-3]: LR < 1e-4 produces near-zero pred_std (VR=0.14);
|
| 59 |
# LR > 1e-3 causes 1-epoch divergence. This band is the stable zone.
|
|
|
|
| 74 |
training_cfg = TrainingConfig(
|
| 75 |
max_epochs=50,
|
| 76 |
early_stopping_patience=8,
|
| 77 |
+
# 16 gives 19 batches/epoch, 32 gives ~10. 64 produced only 4
|
| 78 |
+
# batches/epoch with noisy gradients β removed after REG-2026-001.
|
| 79 |
+
batch_size=trial.suggest_categorical("batch_size", [16, 32]),
|
| 80 |
val_ratio=base_cfg.training.val_ratio,
|
| 81 |
test_ratio=base_cfg.training.test_ratio,
|
| 82 |
lookback_days=base_cfg.training.lookback_days,
|
|
|
|
| 205 |
|
| 206 |
trial.set_user_attr("variance_ratio", round(vr, 4))
|
| 207 |
trial.set_user_attr("pred_std", round(pred_std, 6))
|
| 208 |
+
|
| 209 |
+
# --- Directional accuracy & Sharpe guard (REG-2026-001) ---
|
| 210 |
+
# Prevents Optuna from selecting configs that overfit training data
|
| 211 |
+
# but fail to generalise directionally.
|
| 212 |
+
pred_sign = np.sign(y_pred[:n])
|
| 213 |
+
actual_sign = np.sign(y_actual[:n])
|
| 214 |
+
da = float(np.mean(pred_sign == actual_sign))
|
| 215 |
+
trial.set_user_attr("directional_accuracy", round(da, 4))
|
| 216 |
+
|
| 217 |
+
# Strategy Sharpe on validation set
|
| 218 |
+
strategy_returns = np.sign(y_pred[:n]) * y_actual[:n]
|
| 219 |
+
sr_mean = float(strategy_returns.mean())
|
| 220 |
+
sr_std = float(strategy_returns.std()) + 1e-9
|
| 221 |
+
val_sharpe = sr_mean / sr_std
|
| 222 |
+
trial.set_user_attr("val_sharpe", round(val_sharpe, 4))
|
| 223 |
+
|
| 224 |
+
# Hard prune: negative Sharpe = systematically wrong direction
|
| 225 |
+
if val_sharpe < 0.0:
|
| 226 |
+
logger.warning(
|
| 227 |
+
"Trial %d PRUNED: negative val_sharpe=%.4f (DA=%.2f%%)",
|
| 228 |
+
trial.number, val_sharpe, da * 100,
|
| 229 |
+
)
|
| 230 |
+
import optuna
|
| 231 |
+
raise optuna.exceptions.TrialPruned()
|
| 232 |
+
|
| 233 |
+
# Soft penalty: DA below coin-flip adds heavy cost
|
| 234 |
+
da_penalty = 0.0
|
| 235 |
+
if da < 0.50:
|
| 236 |
+
da_penalty = 2.0 * (0.50 - da)
|
| 237 |
+
logger.info("Trial %d: DA=%.2f%% < 50%% β da_penalty=%.4f",
|
| 238 |
+
trial.number, da * 100, da_penalty)
|
| 239 |
+
|
| 240 |
except Exception as exc:
|
| 241 |
+
logger.debug("Trial %d variance/DA check failed: %s", trial.number, exc)
|
| 242 |
+
da_penalty = 0.0
|
| 243 |
|
| 244 |
+
score = float(val_loss) + variance_penalty + da_penalty
|
| 245 |
logger.info(
|
| 246 |
+
"Trial %d: val_loss=%.4f vr_penalty=%.4f da_penalty=%.4f β score=%.4f",
|
| 247 |
+
trial.number, float(val_loss), variance_penalty, da_penalty, score,
|
| 248 |
)
|
| 249 |
return score
|
| 250 |
|