# tok = TokenCollater()
# ds = LhotseTTSDataset(tok)
Audio TTS Datasets
TTS datasets
LibriTTS
Lhotse-based Base Class
https://github.com/Lightning-AI/lightning/issues/10358 https://colab.research.google.com/drive/1HKSYPsWx_HoCdrnLpaPdYj5zwlPsM3NH
TTS Base Class
LibriTTS DataModule
#(Waveform, Sample_rate, Original_text, Normalized_text, Speaker_ID, Chapter_ID, Utterance_ID)
= LIBRITTS("../data/en", 'test-clean')
ds print(ds[0])
0][0], ds[0][1]) plot_waveform(ds[
Usage
# num_jobs=0 turns parallel computing off within jupyter notebook. Else it fails.
= LibriTTSDataModule(
dm ="../data/en",
target_dir="test-clean",
dataset_parts="../data/en/LibriTTS/test-clean",
output_dir=1
num_jobs )
# skip download and use local data folder
# dm.prepare_data()
# libri = prepare_libritts("../data/en/LibriTTS", dataset_parts="test-clean")
='test') dm.setup(stage
= dm.test_dataloader()
test_dl = next(iter(test_dl))
batch print(batch.keys())
print(batch['feats_pad'].shape)
'feats_pad'][3].transpose(0,1))
plt.imshow(batch[print(batch['feats_lens'])
print(batch['tokens_pad'][3], batch['tokens_lens'][3])
= dm.tokenizer.inverse(batch['tokens_pad'], batch['tokens_lens'])
original_sentences print(original_sentences)