大家好,我是考100的代码小小顾,祝大家学习进步,加薪顺利呀。今天说一说fastpitch和fastspeech2_学编程怎么做笔记,希望您对编程的造诣更进一步.
1. prepare_align.py
import argparse
import yaml
from preprocessor import ljspeech, aishell3, libritts
# config为配置文件中的内容,dataset为一个配置项,用以识别需要训练的数据集
def main(config):
if "LJSpeech" in config["dataset"]:
ljspeech.prepare_align(config)
if "AISHELL3" in config["dataset"]:
aishell3.prepare_align(config)
if "LibriTTS" in config["dataset"]:
libritts.prepare_align(config)
if __name__ == "__main__":
# 运行的时候加上一个参数config config为preprocess.yaml的路径
parser = argparse.ArgumentParser()
parser.add_argument("config", type=str, help="path to preprocess.yaml")
args = parser.parse_args()
# config为preprocess.yaml的内容,传入该文件的路径,读取该文件,yaml.FullLoader参数读取全部yaml语言
# 禁止执行任意函数,这样 load() 函数也变得更加安全
config = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
main(config)
代码100分
2. preprocess.yaml
代码100分 dataset: "LibriTTS"
path:
corpus_path: "/home/ming/Data/LibriTTS/train-clean-360"
lexicon_path: "lexicon/librispeech-lexicon.txt"
raw_path: "./raw_data/LibriTTS"
preprocessed_path: "./preprocessed_data/LibriTTS"
preprocessing:
val_size: 512
text:
text_cleaners: ["english_cleaners"]
language: "en"
audio:
sampling_rate: 22050
max_wav_value: 32768.0
stft:
filter_length: 1024
hop_length: 256
win_length: 1024
mel:
n_mel_channels: 80
mel_fmin: 0
mel_fmax: 8000 # please set to 8000 for HiFi-GAN vocoder, set to null for MelGAN vocoder
pitch:
feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
normalization: True
energy:
feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
normalization: True
3. LibriTTS.py
import os
import librosa
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
from text import _clean_text
def prepare_align(config):
in_dir = config["path"]["corpus_path"] # "/home/ming/Data/LibriTTS/train-clean-360"
out_dir = config["path"]["raw_path"] # "./raw_data/LibriTTS"
sampling_rate = config["preprocessing"]["audio"]["sampling_rate"] # 22050
max_wav_value = config["preprocessing"]["audio"]["max_wav_value"] # 32768
cleaners = config["preprocessing"]["text"]["text_cleaners"] # english_cleaners
# os.listdir() 返回指定目录下的所有文件名和目录名
# 文件名为speaker
for speaker in tqdm(os.listdir(in_dir)):
# os.path.join()将in_dir与speaker连接起来,返回目录下的所有文件名和目录名
# in_dir/speaker/chapter
for chapter in os.listdir(os.path.join(in_dir, speaker)):
# 返回in_dir/speaker/chapter目录下的所有文件
# 该目录下的文件包括三种:.normalized.txt .wav .original.txt
# file_name文件名
for file_name in os.listdir(os.path.join(in_dir, speaker, chapter)):
if file_name[-4:] != ".wav":
continue
# 100_121669_000001_000000.normalized.txt
# 100_121669_000001_000000.original.txt
# 100_121669_000001_000000.wav
# 不是wav文件就跳过,取wav文件的文件名,不取后缀
base_name = file_name[:-4]
# .normalized.txt文件中存着一句英语句子,如Tom, the Piper's Son
text_path = os.path.join(
in_dir, speaker, chapter, "{}.normalized.txt".format(base_name)
)
wav_path = os.path.join(
in_dir, speaker, chapter, "{}.wav".format(base_name)
)
# 读取文本内容,如text=Tom, the Piper's Son
with open(text_path) as f:
text = f.readline().strip("\n")
# ######## ①
# 乱码处理、大小写处理、缩写展开、空格处理、数字处理
text = _clean_text(text, cleaners)
# 创建文件夹out_dir/speaker,且目录存在不会触发目录存在异常
os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
# librosa音频信号处理库函数
# load 从文件加载音频数据,而且可以通过参数设置是否保留双声道,采样率,重采样类型
# 返回类型wav为numpy.ndarray _为sampling_rate
wav, _ = librosa.load(wav_path, sampling_rate)
# wav = wav / (max(|wav|) * 32768)
# 归一化,好处1,消除奇异样本数据的影响,好处2,cond
wav = wav / max(abs(wav)) * max_wav_value # 32768.0 ???
# 将numpy格式的wav写入到指定文件中,out_dir/speaker/{base_name}.wav,sr,数值类型转换
wavfile.write(
os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
sampling_rate,
# 设置改类型是由ndarray中的数值大小范围决定的,int16:-32768~32768
wav.astype(np.int16),
)
# 打开out_dir/speaker/{base_name}.lab,
# 将从{base_name}.normalized.txt文件中读取出来,然后经过处理的text写入到{base_name}.lab文件中
with open(
os.path.join(out_dir, speaker, "{}.lab".format(base_name)),
"w",
) as f1:
f1.write(text)
3.1 function() _text_clean() 调用文本处理
代码100分 def _clean_text(text, cleaner_names):
for name in cleaner_names:
# getattr() 返回cleaners的name属性
cleaner = getattr(cleaners, name)
if not cleaner:
raise Exception("Unknown cleaner: %s" % name)
# cleaner = english_cleaners
# 调用 def english_cleaners(text):
text = cleaner(text)
return text
3.2 function english_cleaners(text) 文本处理
def english_cleaners(text):
'''Pipeline for English text, including number and abbreviation expansion.'''
text = convert_to_ascii(text)
text = lowercase(text)
# 数字处理
text = expand_numbers(text)
# 按照词典将缩写展开
text = expand_abbreviations(text)
# 将各种制表符,tab,\t,\n,使用空格替换
text = collapse_whitespace(text)
return text
3.3 function expand_numbers(text) 文本中的数字处理
""" from https://github.com/keithito/tacotron """
import inflect
import re
_inflect = inflect.engine()
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
_number_re = re.compile(r"[0-9]+")
def _remove_commas(m):
return m.group(1).replace(",", "")
def _expand_decimal_point(m):
return m.group(1).replace(".", " point ")
def _expand_dollars(m):
match = m.group(1)
parts = match.split(".")
if len(parts) > 2:
return match + " dollars" # Unexpected format
dollars = int(parts[0]) if parts[0] else 0
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
if dollars and cents:
dollar_unit = "dollar" if dollars == 1 else "dollars"
cent_unit = "cent" if cents == 1 else "cents"
return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
elif dollars:
dollar_unit = "dollar" if dollars == 1 else "dollars"
return "%s %s" % (dollars, dollar_unit)
elif cents:
cent_unit = "cent" if cents == 1 else "cents"
return "%s %s" % (cents, cent_unit)
else:
return "zero dollars"
def _expand_ordinal(m):
return _inflect.number_to_words(m.group(0))
def _expand_number(m):
num = int(m.group(0))
if num > 1000 and num < 3000:
if num == 2000:
return "two thousand"
elif num > 2000 and num < 2010:
return "two thousand " + _inflect.number_to_words(num % 100)
elif num % 100 == 0:
return _inflect.number_to_words(num // 100) + " hundred"
else:
return _inflect.number_to_words(
num, andword="", zero="oh", group=2
).replace(", ", " ")
else:
return _inflect.number_to_words(num, andword="")
def normalize_numbers(text):
# re.sub(正则表达式,与原内容中相匹配的内容,原文本)
# 将整数如3,3330中的,变成无字符,即3,3330 -> 33330
text = re.sub(_comma_number_re, _remove_commas, text)
# £3,333 -> 3333 pounds
text = re.sub(_pounds_re, r"\1 pounds", text)
# $333.3 ->333 dollars, 3 cents
text = re.sub(_dollars_re, _expand_dollars, text)
# 333.3 -> 333 point 3
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
# 17th -> seventeenth
text = re.sub(_ordinal_re, _expand_ordinal, text)
# num to word
text = re.sub(_number_re, _expand_number, text)
return text
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
转载请注明出处: https://daima100.com/4187.html