#!/usr/bin/env python3 """ Train the BitNet AI Fusion model. Uses ternary weights (-1, 0, 1) and 8-bit activations. Now includes real-time PnL backtesting and Confusion Matrix logging. """ import sys import os import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset, random_split from tqdm.auto import tqdm import logging from safetensors.torch import save_file, load_file from huggingface_hub import HfApi, create_repo, hf_hub_download import numpy as np from sklearn.metrics import classification_report, confusion_matrix # Add project root to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from trading_cli.strategy.ai.model import create_model from scripts.generate_ai_dataset import build_dataset, SEQ_LEN, LOOKAHEAD # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Hyperparameters EPOCHS = 100 BATCH_SIZE = 4096 # Starting point for dynamic search LR = 0.0003 HIDDEN_DIM = 512 LAYERS = 8 # HF Configuration HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. "username/BitFin" HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo HF_TOKEN = os.getenv("HF_TOKEN") def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128): """Automatically find the largest batch size that fits in VRAM.""" if device.type == 'cpu': return 64 tqdm.write("🔍 Searching for optimal batch size for your GPU...") batch_size = start_batch last_success = batch_size pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch") pbar.update(batch_size) try: while batch_size <= 16384: # Ceiling mock_X = torch.randn(batch_size, seq_len, input_dim).to(device) mock_y = torch.randint(0, 3, (batch_size,)).to(device) outputs = model(mock_X) loss = nn.CrossEntropyLoss()(outputs, mock_y) loss.backward() model.zero_grad() last_success = batch_size batch_size *= 2 pbar.update(batch_size - last_success) torch.cuda.empty_cache() except RuntimeError as e: pbar.close() if "out of memory" in str(e).lower(): tqdm.write(f"💡 GPU Hit limit at {batch_size}. Using {last_success} as optimal batch.") torch.cuda.empty_cache() else: raise e pbar.close() return last_success def train(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}", flush=True) # 1. Load or Generate Dataset if not os.path.exists("data/trading_dataset.pt"): try: print("📦 Fetching dataset from Hugging Face...", flush=True) hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", local_dir="data", repo_type="dataset") except Exception: print("🚀 Starting on-the-fly dataset generation...", flush=True) build_dataset() print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True) data = torch.load("data/trading_dataset.pt") X, y, rets = data["X"], data["y"], data["rets"] # 2. Split Data dataset = TensorDataset(X, y, rets) train_size = int(0.8 * len(dataset)) val_size = len(dataset) - train_size train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42)) # 3. Create Model input_dim = X.shape[2] model = create_model(input_dim=input_dim, hidden_dim=HIDDEN_DIM, layers=LAYERS, seq_len=SEQ_LEN) model.to(device) # 4. Dynamic Batch Sizing batch_size = get_max_batch_size(model, input_dim, SEQ_LEN, device) train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0) val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=0) optimizer = optim.AdamW(model.parameters(), lr=LR) # 5. Class Weights (HOLD: 2.0, BUY: 1.0, SELL: 3.0) class_weights = torch.tensor([2.0, 1.0, 3.0]).to(device) criterion = nn.CrossEntropyLoss(weight=class_weights) # Mixed Precision Setup dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16 device_type = 'cuda' if torch.cuda.is_available() else 'cpu' use_bf16 = (dtype == torch.bfloat16) scaler = torch.amp.GradScaler(device_type, enabled=(not use_bf16 and device.type == 'cuda')) tqdm.write(f"🚀 Starting training (Batch Size: {batch_size}, Precision: {dtype})...") best_val_loss = float('inf') for epoch in range(EPOCHS): model.train() train_loss = 0 correct = 0 total = 0 pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}") for batch_X, batch_y, _ in pbar: batch_X, batch_y = batch_X.to(device), batch_y.to(device) optimizer.zero_grad() with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')): outputs = model(batch_X) loss = criterion(outputs, batch_y) if not use_bf16: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += batch_y.size(0) correct += predicted.eq(batch_y).sum().item() pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{100.*correct/total:.1f}%"}) # Validation & Backtest model.eval() val_loss = 0 all_preds, all_true, all_rets = [], [], [] with torch.no_grad(): for batch_X, batch_y, batch_r in val_loader: batch_X, batch_y = batch_X.to(device), batch_y.to(device) with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')): outputs = model(batch_X) loss = criterion(outputs, batch_y) val_loss += loss.item() # Apply Probability Threshold (0.6) probs = torch.softmax(outputs, dim=-1) conf, preds = torch.max(probs, dim=-1) # If confidence < 0.6, force HOLD (0) # This reduces noisy trades and targets high-conviction signals threshold = 0.6 final_preds = preds.clone() mask = (conf < threshold) & (preds != 0) final_preds[mask] = 0 all_preds.extend(final_preds.cpu().numpy()) all_true.extend(batch_y.cpu().numpy()) all_rets.extend(batch_r.numpy()) avg_val_loss = val_loss / len(val_loader) # Calculate Backtest Metrics all_preds = np.array(all_preds) all_true = np.array(all_true) all_rets = np.array(all_rets) buys = int((all_preds == 1).sum()) sells = int((all_preds == 2).sum()) buy_pnl = float(np.sum(all_rets[all_preds == 1])) sell_pnl = float(-np.sum(all_rets[all_preds == 2])) # Future return is inverse for SELL total_pnl = buy_pnl + sell_pnl buy_win_rate = float(np.sum((all_preds == 1) & (all_true == 1)) / (buys + 1e-6)) sell_win_rate = float(np.sum((all_preds == 2) & (all_true == 2)) / (sells + 1e-6)) tqdm.write(f"\n--- Epoch {epoch+1} Statistics ---") tqdm.write(f"Val Loss: {avg_val_loss:.4f} | Total PnL: {total_pnl:+.4f}") tqdm.write(f"BUYs: {buys} | PnL: {buy_pnl:+.4f} | Win Rate: {buy_win_rate:.1%}") tqdm.write(f"SELLs: {sells} | PnL: {sell_pnl:+.4f} | Win Rate: {sell_win_rate:.1%}") tqdm.write(f"Activity: {(buys+sells)/len(all_preds):.1%}") if buys + sells > 0: cm = confusion_matrix(all_true, all_preds, labels=[0, 1, 2]) tqdm.write(f"Confusion Matrix (HOLD/BUY/SELL):\n{cm}") if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss os.makedirs("models", exist_ok=True) save_file(model.state_dict(), "models/ai_fusion_bitnet.safetensors") print("✅ Training complete. Final model saved.") # Upload to HF if HF_REPO_ID and HF_TOKEN: try: print(f"📤 Uploading to HF: {HF_REPO_ID}...", flush=True) api = HfApi() create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model") api.upload_file( path_or_fileobj="models/ai_fusion_bitnet.safetensors", path_in_repo="ai_fusion_bitnet.safetensors", repo_id=HF_REPO_ID, token=HF_TOKEN ) print("✅ Upload successful!", flush=True) except Exception as e: print(f"⚠️ Upload failed: {e}", flush=True) if __name__ == "__main__": train()