Spaces:

BART-ender
/

ml_trainer_env

Sleeping

App Files Files Community

ml_trainer_env / server /models_nn.py

BART-ender

Upload folder using huggingface_hub

8f24287 verified 18 days ago

raw

history blame contribute delete

3.52 kB

	"""
	Neural network architectures for the ML Training Optimizer environment.

	Provides three model architectures of increasing complexity:
	- SimpleMLP: For MNIST (Easy task)
	- SmallCNN: For FashionMNIST (Medium task)
	- DeeperCNN: For CIFAR-10 (Hard task)

	All models accept a configurable dropout rate set by the agent.
	"""

	import torch.nn as nn


	class SimpleMLP(nn.Module):
	"""
	2-layer MLP for MNIST digit classification.
	~100k parameters. Trains in ~0.5-1s per epoch on CPU with 4k samples.

	Architecture: 784 → 128 → 64 → 10
	"""

	def __init__(self, dropout: float = 0.0):
	super().__init__()
	self.flatten = nn.Flatten()
	self.layers = nn.Sequential(
	nn.Linear(784, 128),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(128, 64),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(64, 10),
	)

	def forward(self, x):
	x = self.flatten(x)
	return self.layers(x)


	class SmallCNN(nn.Module):
	"""
	Small CNN for FashionMNIST classification.
	~200k parameters. Trains in ~1-2s per epoch on CPU with 6.5k samples.

	Architecture: 2×(Conv→ReLU→MaxPool) → FC(128) → FC(10)
	"""

	def __init__(self, dropout: float = 0.0):
	super().__init__()
	self.features = nn.Sequential(
	nn.Conv2d(1, 32, kernel_size=3, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2),
	nn.Conv2d(32, 64, kernel_size=3, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2),
	)
	self.classifier = nn.Sequential(
	nn.Flatten(),
	nn.Linear(64 * 7 * 7, 128),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(128, 10),
	)

	def forward(self, x):
	x = self.features(x)
	return self.classifier(x)


	class DeeperCNN(nn.Module):
	"""
	Deeper CNN for CIFAR-10 classification.
	~500k parameters. Trains in ~2-3s per epoch on CPU with 8k samples.

	Architecture: 3×(Conv→BN→ReLU→MaxPool) → FC(256) → FC(10)
	"""

	def __init__(self, dropout: float = 0.0):
	super().__init__()
	self.features = nn.Sequential(
	# Block 1: 3→32, 32×32 → 16×16
	nn.Conv2d(3, 32, kernel_size=3, padding=1),
	nn.BatchNorm2d(32),
	nn.ReLU(),
	nn.MaxPool2d(2),
	# Block 2: 32→64, 16×16 → 8×8
	nn.Conv2d(32, 64, kernel_size=3, padding=1),
	nn.BatchNorm2d(64),
	nn.ReLU(),
	nn.MaxPool2d(2),
	# Block 3: 64→128, 8×8 → 4×4
	nn.Conv2d(64, 128, kernel_size=3, padding=1),
	nn.BatchNorm2d(128),
	nn.ReLU(),
	nn.MaxPool2d(2),
	)
	self.classifier = nn.Sequential(
	nn.Flatten(),
	nn.Linear(128 * 4 * 4, 256),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(256, 10),
	)

	def forward(self, x):
	x = self.features(x)
	return self.classifier(x)


	def create_model(model_type: str, dropout: float = 0.0) -> nn.Module:
	"""Factory function to create a model by name."""
	models = {
	"simple_mlp": SimpleMLP,
	"small_cnn": SmallCNN,
	"deeper_cnn": DeeperCNN,
	}
	if model_type not in models:
	raise ValueError(f"Unknown model type: {model_type}. Choose from {list(models.keys())}")
	return models[model_type](dropout=dropout)