| """ |
| BTLM_Extensions: Extensions Package for BitTransformerLM |
| ======================================================= |
| |
| This package provides advanced optimizers and compression techniques |
| as extensions for BitTransformerLM, allowing easy experimentation with |
| different training configurations. |
| |
| Available Extensions: |
| |
| Optimizers: |
| - Muon: Orthogonal momentum optimizer with Newton-Schulz iterations |
| - Lion: EvoLved Sign Momentum optimizer for memory efficiency |
| - Adafactor: Memory-efficient factorized optimizer |
| |
| Compression: |
| - RLE: Advanced Run-Length Encoding with multiple schemes |
| |
| Usage: |
| from BTLM_Extensions import configure_muon_optimizer, RLEEncoder |
| |
| # Use Muon optimizer |
| optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3) |
| |
| # Use RLE compression |
| encoder = RLEEncoder(scheme="adaptive") |
| compressed, metadata = encoder.encode(data) |
| """ |
|
|
| __version__ = "1.0.0" |
| __author__ = "BitTransformerLM Extensions" |
| __email__ = "extensions@bittransformerlm.ai" |
|
|
| |
| from .muon_optimizer import ( |
| Muon, |
| configure_muon_optimizer, |
| create_muon_training_config, |
| ) |
|
|
| from .lion_optimizer import ( |
| Lion, |
| AdaptiveLion, |
| configure_lion_optimizer, |
| configure_adaptive_lion_optimizer, |
| create_lion_training_config, |
| ) |
|
|
| from .adafactor_optimizer import ( |
| Adafactor, |
| AdafactorScheduler, |
| configure_adafactor_optimizer, |
| configure_adafactor_with_scheduler, |
| create_adafactor_training_config, |
| analyze_memory_usage, |
| ) |
|
|
| |
| from .rle_compression import ( |
| RLEEncoder, |
| CompressedBitDataset, |
| create_compression_aware_loss, |
| integrate_rle_with_training, |
| benchmark_compression_schemes, |
| create_rle_training_config, |
| ) |
|
|
| |
| def get_optimizer_config(optimizer_type: str, **kwargs): |
| """ |
| Get configuration for specified optimizer type. |
| |
| Args: |
| optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
| **kwargs: Optimizer-specific parameters |
| |
| Returns: |
| Dictionary with optimizer configuration |
| """ |
| if optimizer_type.lower() == "muon": |
| return create_muon_training_config(**kwargs) |
| elif optimizer_type.lower() == "lion": |
| return create_lion_training_config(**kwargs) |
| elif optimizer_type.lower() == "adafactor": |
| return create_adafactor_training_config(**kwargs) |
| else: |
| raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
|
|
|
|
| def configure_optimizer(optimizer_type: str, model, **kwargs): |
| """ |
| Configure optimizer based on type string. |
| |
| Args: |
| optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
| model: PyTorch model to optimize |
| **kwargs: Optimizer-specific parameters |
| |
| Returns: |
| Tuple of (optimizer, scheduler) |
| """ |
| if optimizer_type.lower() == "muon": |
| return configure_muon_optimizer(model, **kwargs) |
| elif optimizer_type.lower() == "lion": |
| return configure_lion_optimizer(model, **kwargs) |
| elif optimizer_type.lower() == "adafactor": |
| return configure_adafactor_optimizer(model, **kwargs) |
| else: |
| raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
|
|
|
|
| |
| class ExtensionManager: |
| """ |
| Manager class for easy integration with BitTransformerLM. |
| |
| Provides unified interface for switching between optimizers |
| and compression schemes. |
| """ |
| |
| SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"] |
| SUPPORTED_COMPRESSION = ["rle"] |
| |
| def __init__(self): |
| self.current_optimizer = None |
| self.current_compression = None |
| |
| def setup_optimizer(self, optimizer_type: str, model, **kwargs): |
| """Setup optimizer for training.""" |
| if optimizer_type not in self.SUPPORTED_OPTIMIZERS: |
| raise ValueError(f"Unsupported optimizer: {optimizer_type}") |
| |
| optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs) |
| self.current_optimizer = optimizer_type |
| return optimizer, scheduler |
| |
| def setup_compression(self, compression_type: str, **kwargs): |
| """Setup compression scheme.""" |
| if compression_type not in self.SUPPORTED_COMPRESSION: |
| raise ValueError(f"Unsupported compression: {compression_type}") |
| |
| if compression_type == "rle": |
| encoder = RLEEncoder(**kwargs) |
| self.current_compression = compression_type |
| return encoder |
| |
| def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs): |
| """Create comprehensive training configuration.""" |
| config = { |
| "optimizer": get_optimizer_config(optimizer_type, **kwargs), |
| "compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None, |
| "extensions": { |
| "optimizer_type": optimizer_type, |
| "compression_type": compression_type, |
| "version": __version__, |
| } |
| } |
| return config |
| |
| def benchmark_optimizers(self, model, test_data, epochs: int = 5): |
| """Benchmark all available optimizers on test data.""" |
| import torch |
| import torch.nn.functional as F |
| import time |
| |
| results = {} |
| |
| for opt_type in self.SUPPORTED_OPTIMIZERS: |
| print(f"Benchmarking {opt_type} optimizer...") |
| |
| |
| model_copy = type(model)(**model._current_params()) |
| model_copy.load_state_dict(model.state_dict()) |
| |
| try: |
| |
| optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3) |
| |
| |
| start_time = time.time() |
| losses = [] |
| |
| for epoch in range(epochs): |
| optimizer.zero_grad() |
| |
| |
| logits, _ = model_copy(test_data) |
| pred = logits[:, :-1, :].reshape(-1, 2) |
| target = test_data[:, 1:].reshape(-1) |
| loss = F.cross_entropy(pred, target) |
| |
| loss.backward() |
| optimizer.step() |
| if scheduler: |
| scheduler.step() |
| |
| losses.append(loss.item()) |
| |
| end_time = time.time() |
| |
| results[opt_type] = { |
| "final_loss": losses[-1], |
| "avg_loss": sum(losses) / len(losses), |
| "training_time": end_time - start_time, |
| "convergence": losses[0] - losses[-1], |
| "success": True, |
| } |
| |
| except Exception as e: |
| results[opt_type] = { |
| "final_loss": float('inf'), |
| "avg_loss": float('inf'), |
| "training_time": 0, |
| "convergence": 0, |
| "success": False, |
| "error": str(e), |
| } |
| |
| return results |
|
|
|
|
| |
| extension_manager = ExtensionManager() |
|
|
| |
| __all__ = [ |
| |
| "Muon", |
| "Lion", |
| "AdaptiveLion", |
| "Adafactor", |
| "AdafactorScheduler", |
| |
| |
| "configure_muon_optimizer", |
| "configure_lion_optimizer", |
| "configure_adaptive_lion_optimizer", |
| "configure_adafactor_optimizer", |
| "configure_adafactor_with_scheduler", |
| |
| |
| "create_muon_training_config", |
| "create_lion_training_config", |
| "create_adafactor_training_config", |
| |
| |
| "RLEEncoder", |
| "CompressedBitDataset", |
| "create_compression_aware_loss", |
| "integrate_rle_with_training", |
| "benchmark_compression_schemes", |
| "create_rle_training_config", |
| |
| |
| "get_optimizer_config", |
| "configure_optimizer", |
| "ExtensionManager", |
| "extension_manager", |
| "analyze_memory_usage", |
| ] |
|
|
| |
| def get_version(): |
| """Get package version.""" |
| return __version__ |
|
|
| def list_optimizers(): |
| """List all available optimizers.""" |
| return ExtensionManager.SUPPORTED_OPTIMIZERS.copy() |
|
|
| def list_compression_schemes(): |
| """List all available compression schemes.""" |
| return ExtensionManager.SUPPORTED_COMPRESSION.copy() |
|
|
| def get_package_info(): |
| """Get package information.""" |
| return { |
| "name": "BTLM_Extensions", |
| "version": __version__, |
| "author": __author__, |
| "email": __email__, |
| "optimizers": list_optimizers(), |
| "compression": list_compression_schemes(), |
| "description": "Advanced optimizers and compression for BitTransformerLM", |
| } |
|
|
| |
| def _welcome_message(): |
| """Print welcome message with available extensions.""" |
| print(f"π BTLM_Extensions v{__version__} loaded!") |
| print(f"π Available optimizers: {', '.join(list_optimizers())}") |
| print(f"ποΈ Available compression: {', '.join(list_compression_schemes())}") |
| print("π Use help(BTLM_Extensions) for detailed documentation") |
|
|
| |
| |
|
|
| |
| def demo_usage(): |
| """ |
| Demonstration of BTLM_Extensions usage: |
| |
| # Quick optimizer swap |
| from BTLM_Extensions import configure_optimizer |
| |
| # Try different optimizers |
| muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3) |
| lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4) |
| adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model) |
| |
| # Use with BitTransformerLM training |
| from bit_transformer.training import train_loop |
| |
| train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched) |
| |
| # Advanced compression |
| from BTLM_Extensions import RLEEncoder, integrate_rle_with_training |
| |
| # Setup compression-aware training |
| dataset, loss_fn = integrate_rle_with_training(model, data) |
| |
| # Benchmark optimizers |
| from BTLM_Extensions import extension_manager |
| |
| results = extension_manager.benchmark_optimizers(model, test_data) |
| print("Benchmark results:", results) |
| """ |
| pass |