| | import torch |
| | import soundfile as sf |
| | from transformers import AutoConfig |
| |
|
| | from modeling_xcodec2 import XCodec2Model |
| |
|
| | model_path = "/data/zheny/xcodec2" |
| |
|
| | model = XCodec2Model.from_pretrained(model_path) |
| | model.eval().cuda() |
| |
|
| | |
| | wav, sr = sf.read("test.flac") |
| | wav_tensor = torch.from_numpy(wav).float().unsqueeze(0) |
| |
|
| | with torch.no_grad(): |
| | vq_code = model.encode_code(input_waveform=wav_tensor ) |
| | print(vq_code) |
| | recon_wav = model.decode_code(vq_code).cpu() |
| |
|
| | sf.write("reconstructed.wav", recon_wav[0,0,:].numpy(), sr) |
| |
|