Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 85 additions & 22 deletions GPT_SoVITS/text/english.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

from text import symbols

import unicodedata
from builtins import str as unicode
from g2p_en.expand import normalize_numbers
from nltk.tokenize import TweetTokenizer
word_tokenize = TweetTokenizer().tokenize
from nltk import pos_tag

current_file_path = os.path.dirname(__file__)
CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
CMU_DICT_FAST_PATH = os.path.join(current_file_path, "cmudict-fast.rep")
Expand Down Expand Up @@ -188,9 +195,6 @@ def get_dict():
return g2p_dict


eng_dict = get_dict()


def text_normalize(text):
# todo: eng text normalize
# 适配中文及 g2p_en 标点
Expand All @@ -204,6 +208,16 @@ def text_normalize(text):
for p, r in rep_map.items():
text = re.sub(p, r, text)

# 来自 g2p_en 文本格式化处理
# 增加大写兼容
text = unicode(text)
text = normalize_numbers(text)
text = ''.join(char for char in unicodedata.normalize('NFD', text)
if unicodedata.category(char) != 'Mn') # Strip accents
text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
text = re.sub(r"(?i)i\.e\.", "that is", text)
text = re.sub(r"(?i)e\.g\.", "for example", text)

return text


Expand All @@ -220,30 +234,85 @@ def __init__(self):
for word in ["AE", "AI", "AR", "IOS", "HUD", "OS"]:
del self.cmu[word.lower()]

# "A" 落单不读 "AH0" 读 "EY1"
self.cmu['a'] = [['EY1']]


def predict(self, word):
# 小写 oov 长度小于等于 3 直接读字母
def __call__(self, text):
# tokenization
words = word_tokenize(text)
tokens = pos_tag(words) # tuples of (word, tag)

# steps
prons = []
for o_word, pos in tokens:
# 还原 g2p_en 小写操作逻辑
word = o_word.lower()

if re.search("[a-z]", word) is None:
pron = [word]
# 先把单字母推出去
elif len(word) == 1:
# 单读 A 发音修正, 这里需要原格式 o_word 判断大写
if o_word == "A":
pron = ['EY1']
else:
pron = self.cmu[word][0]
# g2p_en 原版多音字处理
elif word in self.homograph2features: # Check homograph
pron1, pron2, pos1 = self.homograph2features[word]
if pos.startswith(pos1):
pron = pron1
else:
pron = pron2
else:
# 递归查找预测
pron = self.qryword(word)

prons.extend(pron)
prons.extend([" "])

return prons[:-1]


def qryword(self, word):
# 查字典, 单字母除外
if len(word) > 1 and word in self.cmu: # lookup CMU dict
return self.cmu[word][0]

# oov 长度小于等于 3 直接读字母
if (len(word) <= 3):
return [phone for w in word for phone in self(w)]
phones = []
for w in word:
# 单读 A 发音修正, 此处不存在大写的情况
if w == "a":
phones.extend(['EY1'])
else:
phones.extend(self.cmu[w][0])
return phones

# 尝试分离所有格
if re.match(r"^([a-z]+)('s)$", word):
phone = self(word[:-2])
phone.extend(['Z'])
return phone
phones = self.qryword(word[:-2])
# P T K F TH HH 无声辅音结尾 's 发 ['S']
if phones[-1] in ['P', 'T', 'K', 'F', 'TH', 'HH']:
phones.extend(['S'])
# S Z SH ZH CH JH 擦声结尾 's 发 ['IH1', 'Z'] 或 ['AH0', 'Z']
elif phones[-1] in ['S', 'Z', 'SH', 'ZH', 'CH', 'JH']:
phones.extend(['AH0', 'Z'])
# B D G DH V M N NG L R W Y 有声辅音结尾 's 发 ['Z']
# AH0 AH1 AH2 EY0 EY1 EY2 AE0 AE1 AE2 EH0 EH1 EH2 OW0 OW1 OW2 UH0 UH1 UH2 IY0 IY1 IY2 AA0 AA1 AA2 AO0 AO1 AO2
# ER ER0 ER1 ER2 UW0 UW1 UW2 AY0 AY1 AY2 AW0 AW1 AW2 OY0 OY1 OY2 IH IH0 IH1 IH2 元音结尾 's 发 ['Z']
else:
phones.extend(['Z'])
return phones

# 尝试进行分词,应对复合词
comps = wordsegment.segment(word.lower())

# 无法分词的送回去预测
if len(comps)==1:
return super().predict(word)
return self.predict(word)

# 可以分词的递归处理
return [phone for comp in comps for phone in self(comp)]
return [phone for comp in comps for phone in self.qryword(comp)]


_g2p = en_G2p()
Expand All @@ -258,12 +327,6 @@ def g2p(text):


if __name__ == "__main__":
# print(get_dict())
print(g2p("hello"))
print(g2p("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder."))
# all_phones = set()
# for k, syllables in eng_dict.items():
# for group in syllables:
# for ph in group:
# all_phones.add(ph)
# print(all_phones)
print(g2p(text_normalize("e.g. I used openai's AI tool to draw a picture.")))
print(g2p(text_normalize("In this; paper, we propose 1 DSPGAN, a GAN-based universal vocoder.")))