# tok = TokenCollater()
# ds = LhotseTTSDataset(tok)Audio TTS Datasets
TTS datasets
LibriTTS
Lhotse-based Base Class
https://github.com/Lightning-AI/lightning/issues/10358 https://colab.research.google.com/drive/1HKSYPsWx_HoCdrnLpaPdYj5zwlPsM3NH
TTS Base Class
LibriTTS DataModule
#(Waveform, Sample_rate, Original_text, Normalized_text, Speaker_ID, Chapter_ID, Utterance_ID)
ds = LIBRITTS("../data/en", 'test-clean')
print(ds[0])plot_waveform(ds[0][0], ds[0][1])Usage
# num_jobs=0 turns parallel computing off within jupyter notebook. Else it fails.
dm = LibriTTSDataModule(
target_dir="../data/en",
dataset_parts="test-clean",
output_dir="../data/en/LibriTTS/test-clean",
num_jobs=1
)# skip download and use local data folder
# dm.prepare_data()# libri = prepare_libritts("../data/en/LibriTTS", dataset_parts="test-clean")dm.setup(stage='test')test_dl = dm.test_dataloader()
batch = next(iter(test_dl))
print(batch.keys())print(batch['feats_pad'].shape)
plt.imshow(batch['feats_pad'][3].transpose(0,1))
print(batch['feats_lens'])
print(batch['tokens_pad'][3], batch['tokens_lens'][3])original_sentences = dm.tokenizer.inverse(batch['tokens_pad'], batch['tokens_lens'])
print(original_sentences)