| | |
| | |
| | |
| | |
| | |
| |
|
| | import random |
| |
|
| | import numpy as np |
| | import torch |
| | from audiocraft.models.multibanddiffusion import MultiBandDiffusion, DiffusionProcess |
| | from audiocraft.models import EncodecModel, DiffusionUnet |
| | from audiocraft.modules import SEANetEncoder, SEANetDecoder |
| | from audiocraft.modules.diffusion_schedule import NoiseSchedule |
| | from audiocraft.quantization import DummyQuantizer |
| |
|
| |
|
| | class TestMBD: |
| |
|
| | def _create_mbd(self, |
| | sample_rate: int, |
| | channels: int, |
| | n_filters: int = 3, |
| | n_residual_layers: int = 1, |
| | ratios: list = [5, 4, 3, 2], |
| | num_steps: int = 1000, |
| | codec_dim: int = 128, |
| | **kwargs): |
| | frame_rate = np.prod(ratios) |
| | encoder = SEANetEncoder(channels=channels, dimension=codec_dim, n_filters=n_filters, |
| | n_residual_layers=n_residual_layers, ratios=ratios) |
| | decoder = SEANetDecoder(channels=channels, dimension=codec_dim, n_filters=n_filters, |
| | n_residual_layers=n_residual_layers, ratios=ratios) |
| | quantizer = DummyQuantizer() |
| | compression_model = EncodecModel(encoder, decoder, quantizer, frame_rate=frame_rate, |
| | sample_rate=sample_rate, channels=channels, **kwargs) |
| | diffusion_model = DiffusionUnet(chin=channels, num_steps=num_steps, codec_dim=codec_dim) |
| | schedule = NoiseSchedule(device='cpu', num_steps=num_steps) |
| | DP = DiffusionProcess(model=diffusion_model, noise_schedule=schedule) |
| | mbd = MultiBandDiffusion(DPs=[DP], codec_model=compression_model) |
| | return mbd |
| |
|
| | def test_model(self): |
| | random.seed(1234) |
| | sample_rate = 24_000 |
| | channels = 1 |
| | codec_dim = 128 |
| | mbd = self._create_mbd(sample_rate=sample_rate, channels=channels, codec_dim=codec_dim) |
| | for _ in range(10): |
| | length = random.randrange(1, 10_000) |
| | x = torch.randn(2, channels, length) |
| | res = mbd.regenerate(x, sample_rate) |
| | assert res.shape == x.shape |
| |
|