| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """ |
| | Rules to verbalize numbers into Chinese characters. |
| | https://zh.wikipedia.org/wiki/中文数字#現代中文 |
| | """ |
| | import re |
| | from collections import OrderedDict |
| | from typing import List |
| |
|
| | DIGITS = {str(i): tran for i, tran in enumerate("零一二三四五六七八九")} |
| | UNITS = OrderedDict( |
| | { |
| | 1: "十", |
| | 2: "百", |
| | 3: "千", |
| | 4: "万", |
| | 8: "亿", |
| | } |
| | ) |
| |
|
| | COM_QUANTIFIERS = "(封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分)" |
| |
|
| | |
| | RE_FRAC = re.compile(r"(-?)(\d+)/(\d+)") |
| |
|
| |
|
| | def replace_frac(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | sign = match.group(1) |
| | nominator = match.group(2) |
| | denominator = match.group(3) |
| | try: |
| | import cn2an |
| |
|
| | return cn2an.an2cn(f"{sign}{nominator}/{denominator}", "low") |
| | except: |
| | sign: str = "负" if sign else "" |
| | nominator: str = num2str(nominator) |
| | denominator: str = num2str(denominator) |
| | result = f"{sign}{denominator}分之{nominator}" |
| | return result |
| |
|
| |
|
| | |
| | RE_PERCENTAGE = re.compile(r"(-?)(\d+(\.\d+)?)(%|%)") |
| |
|
| |
|
| | def replace_percentage(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | sign = match.group(1) |
| | percent = match.group(2) |
| | try: |
| | import cn2an |
| |
|
| | return cn2an.an2cn(f"{sign}{percent}%", "low") |
| | except: |
| | sign: str = "负" if sign else "" |
| | percent: str = num2str(percent) |
| | result = f"{sign}百分之{percent}" |
| | return result |
| |
|
| |
|
| | |
| | |
| | RE_INTEGER = re.compile(r"(-)" r"(\d+)") |
| |
|
| |
|
| | def replace_negative_num(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | sign = match.group(1) |
| | number = match.group(2) |
| | try: |
| | import cn2an |
| |
|
| | return cn2an.an2cn(f"{sign}{number}", "low") |
| | except: |
| | sign: str = "负" if sign else "" |
| | number: str = num2str(number) |
| | result = f"{sign}{number}" |
| | return result |
| |
|
| |
|
| | |
| | |
| | RE_DEFAULT_NUM = re.compile(r"\d{3}\d*") |
| |
|
| |
|
| | def replace_default_num(match): |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | number = match.group(0) |
| | try: |
| | import cn2an |
| |
|
| | return cn2an.an2cn(number, "low") |
| | except: |
| | return verbalize_digit(number, alt_one=True) |
| |
|
| |
|
| | |
| | |
| | RE_DECIMAL_NUM = re.compile(r"(-?)((\d+)(\.\d+))" r"|(\.(\d+))") |
| | |
| | RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS) |
| | RE_NUMBER = re.compile(r"(-?)((\d+)(\.\d+)?)" r"|(\.(\d+))") |
| |
|
| |
|
| | def replace_positive_quantifier(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | number = match.group(1) |
| | match_2 = match.group(2) |
| | if match_2 == "+": |
| | match_2 = "多" |
| | match_2: str = match_2 if match_2 else "" |
| | quantifiers: str = match.group(3) |
| | number: str = num2str(number) |
| | result = f"{number}{match_2}{quantifiers}" |
| | return result |
| |
|
| |
|
| | def replace_number(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | sign = match.group(1) |
| | number = match.group(2) |
| | pure_decimal = match.group(5) |
| |
|
| | |
| | import cn2an |
| |
|
| | text = pure_decimal if pure_decimal else f"{sign}{number}" |
| | try: |
| | result = cn2an.an2cn(text, "low") |
| | except ValueError: |
| | if pure_decimal: |
| | result = num2str(pure_decimal) |
| | else: |
| | sign: str = "负" if sign else "" |
| | number: str = num2str(number) |
| | result = f"{sign}{number}" |
| | return result |
| |
|
| |
|
| | |
| | |
| |
|
| | RE_RANGE = re.compile( |
| | r"((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))" |
| | ) |
| |
|
| |
|
| | def replace_range(match) -> str: |
| | """ |
| | Args: |
| | match (re.Match) |
| | Returns: |
| | str |
| | """ |
| | first, second = match.group(1), match.group(8) |
| | first = RE_NUMBER.sub(replace_number, first) |
| | second = RE_NUMBER.sub(replace_number, second) |
| | result = f"{first}到{second}" |
| | return result |
| |
|
| |
|
| | def _get_value(value_string: str, use_zero: bool = True) -> List[str]: |
| | stripped = value_string.lstrip("0") |
| | if len(stripped) == 0: |
| | return [] |
| | elif len(stripped) == 1: |
| | if use_zero and len(stripped) < len(value_string): |
| | return [DIGITS["0"], DIGITS[stripped]] |
| | else: |
| | return [DIGITS[stripped]] |
| | else: |
| | largest_unit = next( |
| | power for power in reversed(UNITS.keys()) if power < len(stripped) |
| | ) |
| | first_part = value_string[:-largest_unit] |
| | second_part = value_string[-largest_unit:] |
| | return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(second_part) |
| |
|
| |
|
| | def verbalize_cardinal(value_string: str) -> str: |
| | if not value_string: |
| | return "" |
| |
|
| | |
| | value_string = value_string.lstrip("0") |
| | if len(value_string) == 0: |
| | return DIGITS["0"] |
| |
|
| | result_symbols = _get_value(value_string) |
| | |
| | if ( |
| | len(result_symbols) >= 2 |
| | and result_symbols[0] == DIGITS["1"] |
| | and result_symbols[1] == UNITS[1] |
| | ): |
| | result_symbols = result_symbols[1:] |
| | return "".join(result_symbols) |
| |
|
| |
|
| | def verbalize_digit(value_string: str, alt_one=False) -> str: |
| | result_symbols = [DIGITS[digit] for digit in value_string] |
| | result = "".join(result_symbols) |
| | if alt_one: |
| | result = result.replace("一", "幺") |
| | return result |
| |
|
| |
|
| | def num2str(value_string: str) -> str: |
| | integer_decimal = value_string.split(".") |
| | if len(integer_decimal) == 1: |
| | integer = integer_decimal[0] |
| | decimal = "" |
| | elif len(integer_decimal) == 2: |
| | integer, decimal = integer_decimal |
| | else: |
| | raise ValueError( |
| | f"The value string: '${value_string}' has more than one point in it." |
| | ) |
| |
|
| | result = verbalize_cardinal(integer) |
| |
|
| | decimal = decimal.rstrip("0") |
| | if decimal: |
| | |
| | |
| | result = result if result else "零" |
| | result += "点" + verbalize_digit(decimal) |
| | return result |
| |
|