8000 为中文tts添加了人民币与美元播报 by EllaZhangCA · Pull Request #2613 · RVC-Boss/GPT-SoVITS · GitHub
[go: up one dir, main page]

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions GPT_SoVITS/text/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
from text import symbols as symbols_v1
from text import symbols2 as symbols_v2


special = [
# ("%", "zh", "SP"),
("¥", "zh", "SP2"),
# ("¥", "zh", "SP2"), #加了货币计数所以人民币符不是SP2了
("^", "zh", "SP3"),
# ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
]


def clean_text(text, language, version=None):
if version is None:
version = os.environ.get("version", "v2")
Expand All @@ -31,6 +31,14 @@ def clean_text(text, language, version=None):
if language not in language_module_map:
language = "en"
text = " "
if language in ("zh"): #处理货币似乎最佳方案是这里截胡,不然可能被吞...
from text.zh_normalization.num import (
RE_CNY_PREFIX, RE_CNY_SUFFIX, replace_cny_prefix, replace_cny_suffix,
RE_USD_SYMBOL, RE_USD_SUFFIX, replace_usd_symbol, replace_usd_suffix,)
text = RE_CNY_PREFIX.sub(replace_cny_prefix, text)
text = RE_CNY_SUFFIX.sub(replace_cny_suffix, text)
text = RE_USD_SYMBOL.sub(replace_usd_symbol, text)
text = RE_USD_SUFFIX.sub(replace_usd_suffix, text)
for special_s, special_l, target_symbol in special:
if special_s in text and language == special_l:
return clean_special(text, language, special_s, target_symbol, version)
Expand Down
113 changes: 113 additions & 0 deletions GPT_SoVITS/text/zh_normalization/num.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,116 @@ def num2str(value_string: str) -> str:
result = result if result else "零"
result += "点" + verbalize_digit(decimal)
return result

RE_CNY_PREFIX = re.compile(r"(?:¥|¥)\s*(-?\d[\d,]*(?:\.\d+)?)")
RE_CNY_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:人民币|元|CNY|cny|¥|¥))")

def _strip_commas(s: str) -> str:
return s.replace(",", "")

def _split_amount(amount: str):
neg = amount.startswith("-")
if neg:
amount = amount[1:]
amount = _strip_commas(amount) or "0"

if "." in amount:
integer, frac = amount.split(".", 1)
had_frac = True
else:
integer, frac, had_frac = amount, "", False

integer = integer or "0"
frac = (frac + "00")[:2]
return neg, integer, frac, had_frac

#人民币和美元的处理都在cleaner那边,防吞
def replace_cny_amount(amount: str, num2str) -> str:
neg, integer, frac, had_frac = _split_amount(amount)

integer_cn = num2str(integer) if integer != "0" else "零"

jiao, fen = frac[0], frac[1]
parts = []

if integer != "0":
parts.append(integer_cn + "元")
else:
parts.append("零元")

if jiao != "0" or fen != "0":
if jiao != "0":
parts.append(num2str(jiao) + "角")
if fen != "0":
parts.append(num2str(fen) + "分")
elif had_frac:
parts.append("整")

res = "".join(parts)
if neg and res and res[0] != "负":
res = "负" + res
return res

def replace_cny_prefix(m, num2str=num2str):
return replace_cny_amount(m.group(1), num2str)

def replace_cny_suffix(m, num2str=num2str):
return replace_cny_amount(m.group(1), num2str)

#我知道美元符也可能是加拿大元什么的,但是就当它美元吧whatever
RE_USD_SYMBOL = re.compile(r"(?:\$|$)\s*(-?\d[\d,]*(?:\.\d+)?)")
RE_USD_SUFFIX = re.compile(r"(-?\d[\d,]*(?:\.\d+)?)(?:\s*(?:美元|USD|usd|\$|$))")

def _strip_commas(s: str) -> str:
return s.replace(",", "")

def _split_amount(amount: str):
neg = amount.startswith("-")
if neg:
amount = amount[1:]
amount = _strip_commas(amount) or "0"

if "." in amount:
integer, frac = amount.split(".", 1)
had_frac = True
else:
integer, frac, had_frac = amount, "", False

integer = integer or "0"
# 只保留两位小数用来读美分
frac = (frac + "00")[:2]
return neg, integer, frac, had_frac

def replace_usd_amount(amount: str, num2str) -> str:
neg, integer, frac, had_frac = _split_amount(amount)

integer_cn = num2str(integer) if integer != "0" else "零"

jiao, fen = frac[0], frac[1]
parts = []
if integer != "0":
parts.append(integer_cn + "美元")

if jiao != "0" or fen != "0":
cents = ""
if jiao != "0":
cents += num2str(jiao) + "十"
if fen != "0":
cents += num2str(fen)
cents = cents.replace("一十", "十")
parts.append(cents + "美分")
elif had_frac:
parts.append("整")
elif integer == "0":
parts = ["零美元"]

res = "".join(parts)
if neg and res and res[0] != "负":
res = "负" + res
return res

def replace_usd_symbol(m, num2str=num2str):
return replace_usd_amount(m.group(1), num2str)

def replace_usd_suffix(m, num2str=num2str):
return replace_usd_amount(m.group(1), num2str)
0