text_normalizer = TTSTextNormalizer().english_cleaners
aligner = AlignerWAV2VEC2(text_normalizer, device='cpu') # for CI on cpu
wav_path = "../data/en/LibriTTS/test-clean/1089/134686/1089_134686_000015_000001.wav"
txt_path = "../data/en/LibriTTS/test-clean/1089/134686/1089_134686_000015_000001.original.txt"
wav, sr = torchaudio.load(wav_path)
with open(txt_path, 'r') as f: txt = f.read()
alignments = aligner.get_alignments(wav, txt)Aligners
Collection of Aligner models