Spaces:

luohoa97
/

BitFinTrainer

Runtime error

App Files Files Community

BitFinTrainer / scripts /train_ai_model.py

luohoa97

Deploy BitNet-Transformer Trainer

3a263ff verified 10 days ago

raw

history blame contribute delete

9.36 kB

	#!/usr/bin/env python3
	"""
	Train the BitNet AI Fusion model.
	Uses ternary weights (-1, 0, 1) and 8-bit activations.
	Now includes real-time PnL backtesting and Confusion Matrix logging.
	"""

	import sys
	import os
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader, TensorDataset, random_split
	from tqdm.auto import tqdm
	import logging
	from safetensors.torch import save_file, load_file
	from huggingface_hub import HfApi, create_repo, hf_hub_download
	import numpy as np
	from sklearn.metrics import classification_report, confusion_matrix

	# Add project root to path
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	from trading_cli.strategy.ai.model import create_model
	from scripts.generate_ai_dataset import build_dataset, SEQ_LEN, LOOKAHEAD

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Hyperparameters
	EPOCHS = 100
	BATCH_SIZE = 4096 # Starting point for dynamic search
	LR = 0.0003
	HIDDEN_DIM = 512
	LAYERS = 8

	# HF Configuration
	HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. "username/BitFin"
	HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
	HF_TOKEN = os.getenv("HF_TOKEN")

	def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
	"""Automatically find the largest batch size that fits in VRAM."""
	if device.type == 'cpu':
	return 64

	tqdm.write("🔍 Searching for optimal batch size for your GPU...")
	batch_size = start_batch
	last_success = batch_size

	pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
	pbar.update(batch_size)

	try:
	while batch_size <= 16384: # Ceiling
	mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
	mock_y = torch.randint(0, 3, (batch_size,)).to(device)

	outputs = model(mock_X)
	loss = nn.CrossEntropyLoss()(outputs, mock_y)
	loss.backward()
	model.zero_grad()

	last_success = batch_size
	batch_size *= 2
	pbar.update(batch_size - last_success)
	torch.cuda.empty_cache()

	except RuntimeError as e:
	pbar.close()
	if "out of memory" in str(e).lower():
	tqdm.write(f"💡 GPU Hit limit at {batch_size}. Using {last_success} as optimal batch.")
	torch.cuda.empty_cache()
	else:
	raise e

	pbar.close()
	return last_success

	def train():
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}", flush=True)

	# 1. Load or Generate Dataset
	if not os.path.exists("data/trading_dataset.pt"):
	try:
	print("📦 Fetching dataset from Hugging Face...", flush=True)
	hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", local_dir="data", repo_type="dataset")
	except Exception:
	print("🚀 Starting on-the-fly dataset generation...", flush=True)
	build_dataset()

	print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
	data = torch.load("data/trading_dataset.pt")
	X, y, rets = data["X"], data["y"], data["rets"]

	# 2. Split Data
	dataset = TensorDataset(X, y, rets)
	train_size = int(0.8 * len(dataset))
	val_size = len(dataset) - train_size
	train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

	# 3. Create Model
	input_dim = X.shape[2]
	model = create_model(input_dim=input_dim, hidden_dim=HIDDEN_DIM, layers=LAYERS, seq_len=SEQ_LEN)
	model.to(device)

	# 4. Dynamic Batch Sizing
	batch_size = get_max_batch_size(model, input_dim, SEQ_LEN, device)

	train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0)
	val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=0)

	optimizer = optim.AdamW(model.parameters(), lr=LR)

	# 5. Class Weights (HOLD: 2.0, BUY: 1.0, SELL: 3.0)
	class_weights = torch.tensor([2.0, 1.0, 3.0]).to(device)
	criterion = nn.CrossEntropyLoss(weight=class_weights)

	# Mixed Precision Setup
	dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
	device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
	use_bf16 = (dtype == torch.bfloat16)
	scaler = torch.amp.GradScaler(device_type, enabled=(not use_bf16 and device.type == 'cuda'))

	tqdm.write(f"🚀 Starting training (Batch Size: {batch_size}, Precision: {dtype})...")
	best_val_loss = float('inf')

	for epoch in range(EPOCHS):
	model.train()
	train_loss = 0
	correct = 0
	total = 0

	pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
	for batch_X, batch_y, _ in pbar:
	batch_X, batch_y = batch_X.to(device), batch_y.to(device)
	optimizer.zero_grad()

	with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
	outputs = model(batch_X)
	loss = criterion(outputs, batch_y)

	if not use_bf16:
	scaler.scale(loss).backward()
	scaler.step(optimizer)
	scaler.update()
	else:
	loss.backward()
	optimizer.step()

	train_loss += loss.item()
	_, predicted = outputs.max(1)
	total += batch_y.size(0)
	correct += predicted.eq(batch_y).sum().item()

	pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{100.*correct/total:.1f}%"})

	# Validation & Backtest
	model.eval()
	val_loss = 0
	all_preds, all_true, all_rets = [], [], []

	with torch.no_grad():
	for batch_X, batch_y, batch_r in val_loader:
	batch_X, batch_y = batch_X.to(device), batch_y.to(device)
	with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
	outputs = model(batch_X)
	loss = criterion(outputs, batch_y)

	val_loss += loss.item()

	# Apply Probability Threshold (0.6)
	probs = torch.softmax(outputs, dim=-1)
	conf, preds = torch.max(probs, dim=-1)

	# If confidence < 0.6, force HOLD (0)
	# This reduces noisy trades and targets high-conviction signals
	threshold = 0.6
	final_preds = preds.clone()
	mask = (conf < threshold) & (preds != 0)
	final_preds[mask] = 0

	all_preds.extend(final_preds.cpu().numpy())
	all_true.extend(batch_y.cpu().numpy())
	all_rets.extend(batch_r.numpy())

	avg_val_loss = val_loss / len(val_loader)

	# Calculate Backtest Metrics
	all_preds = np.array(all_preds)
	all_true = np.array(all_true)
	all_rets = np.array(all_rets)

	buys = int((all_preds == 1).sum())
	sells = int((all_preds == 2).sum())

	buy_pnl = float(np.sum(all_rets[all_preds == 1]))
	sell_pnl = float(-np.sum(all_rets[all_preds == 2])) # Future return is inverse for SELL
	total_pnl = buy_pnl + sell_pnl

	buy_win_rate = float(np.sum((all_preds == 1) & (all_true == 1)) / (buys + 1e-6))
	sell_win_rate = float(np.sum((all_preds == 2) & (all_true == 2)) / (sells + 1e-6))

	tqdm.write(f"\n--- Epoch {epoch+1} Statistics ---")
	tqdm.write(f"Val Loss: {avg_val_loss:.4f} \| Total PnL: {total_pnl:+.4f}")
	tqdm.write(f"BUYs: {buys} \| PnL: {buy_pnl:+.4f} \| Win Rate: {buy_win_rate:.1%}")
	tqdm.write(f"SELLs: {sells} \| PnL: {sell_pnl:+.4f} \| Win Rate: {sell_win_rate:.1%}")
	tqdm.write(f"Activity: {(buys+sells)/len(all_preds):.1%}")

	if buys + sells > 0:
	cm = confusion_matrix(all_true, all_preds, labels=[0, 1, 2])
	tqdm.write(f"Confusion Matrix (HOLD/BUY/SELL):\n{cm}")

	if avg_val_loss < best_val_loss:
	best_val_loss = avg_val_loss
	os.makedirs("models", exist_ok=True)
	save_file(model.state_dict(), "models/ai_fusion_bitnet.safetensors")

	print("✅ Training complete. Final model saved.")

	# Upload to HF
	if HF_REPO_ID and HF_TOKEN:
	try:
	print(f"📤 Uploading to HF: {HF_REPO_ID}...", flush=True)
	api = HfApi()
	create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
	api.upload_file(
	path_or_fileobj="models/ai_fusion_bitnet.safetensors",
	path_in_repo="ai_fusion_bitnet.safetensors",
	repo_id=HF_REPO_ID,
	token=HF_TOKEN
	)
	print("✅ Upload successful!", flush=True)
	except Exception as e:
	print(f"⚠️ Upload failed: {e}", flush=True)

	if __name__ == "__main__":
	train()