666 lines
22 KiB
Python
666 lines
22 KiB
Python
|
|
"""Unicode Properties."""
|
||
|
|
from __future__ import annotations
|
||
|
|
from .unidata import alias
|
||
|
|
|
||
|
|
UNICODE_RANGE = '\u0000-\U0010ffff'
|
||
|
|
ASCII_RANGE = '\x00-\xff'
|
||
|
|
|
||
|
|
MODE_NORMAL = 0
|
||
|
|
MODE_ASCII = 1
|
||
|
|
MODE_UNICODE = 2
|
||
|
|
|
||
|
|
|
||
|
|
def fmt_string(value: str, is_bytes: bool) -> str:
|
||
|
|
"""Format for bytes string."""
|
||
|
|
|
||
|
|
if is_bytes:
|
||
|
|
return value[:-1] + '\xff' if value.endswith('\U0010ffff') else value
|
||
|
|
else:
|
||
|
|
return value
|
||
|
|
|
||
|
|
|
||
|
|
def get_gc_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `GC` property."""
|
||
|
|
|
||
|
|
from .unidata import generalcategory as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_properties if mode != MODE_UNICODE else prop_table.unicode_properties
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negate = True
|
||
|
|
value = value[1:]
|
||
|
|
else:
|
||
|
|
negate = False
|
||
|
|
|
||
|
|
value = alias.unicode_alias['generalcategory'].get(value, value)
|
||
|
|
is_binary = mode == MODE_ASCII
|
||
|
|
|
||
|
|
length = len(value)
|
||
|
|
if length < 1 or length > 2:
|
||
|
|
raise ValueError('Invalid property')
|
||
|
|
elif length == 1 and value not in obj:
|
||
|
|
raise ValueError('Invalid property')
|
||
|
|
elif length == 2 and (value[0] not in obj or value[1] not in obj[value[0]]):
|
||
|
|
raise ValueError('Invalid property')
|
||
|
|
|
||
|
|
if not negate:
|
||
|
|
p1, p2 = (value[0], value[1]) if len(value) > 1 else (value[0], None)
|
||
|
|
value = ''.join(
|
||
|
|
[fmt_string(v, is_binary) for k, v in obj.get(p1, {}).items() if not k.startswith('^')]
|
||
|
|
) if p2 is None else fmt_string(obj.get(p1, {}).get(p2, ''), is_binary)
|
||
|
|
else:
|
||
|
|
p1, p2 = (value[0], value[1]) if len(value) > 1 else (value[0], '')
|
||
|
|
value = fmt_string(obj.get(p1, {}).get('^' + p2, ''), is_binary)
|
||
|
|
return value
|
||
|
|
|
||
|
|
|
||
|
|
def get_binary_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `BINARY` property."""
|
||
|
|
|
||
|
|
from .unidata import binary as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_binary if mode != MODE_UNICODE else prop_table.unicode_binary
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['binary'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['binary'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_canonical_combining_class_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `CANONICAL COMBINING CLASS` property."""
|
||
|
|
|
||
|
|
from .unidata import canonicalcombiningclass as prop_table
|
||
|
|
|
||
|
|
if mode != MODE_UNICODE:
|
||
|
|
obj = prop_table.ascii_canonical_combining_class
|
||
|
|
else:
|
||
|
|
obj = prop_table.unicode_canonical_combining_class
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['canonicalcombiningclass'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['canonicalcombiningclass'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_east_asian_width_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `EAST ASIAN WIDTH` property."""
|
||
|
|
|
||
|
|
from .unidata import eastasianwidth as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_east_asian_width if mode != MODE_UNICODE else prop_table.unicode_east_asian_width
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['eastasianwidth'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['eastasianwidth'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_grapheme_cluster_break_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `GRAPHEME CLUSTER BREAK` property."""
|
||
|
|
|
||
|
|
from .unidata import graphemeclusterbreak as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_grapheme_cluster_break if mode != MODE_UNICODE else prop_table.unicode_grapheme_cluster_break
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['graphemeclusterbreak'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['graphemeclusterbreak'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_line_break_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `LINE BREAK` property."""
|
||
|
|
|
||
|
|
from .unidata import linebreak as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_line_break if mode != MODE_UNICODE else prop_table.unicode_line_break
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['linebreak'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['linebreak'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_sentence_break_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `SENTENCE BREAK` property."""
|
||
|
|
|
||
|
|
from .unidata import sentencebreak as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_sentence_break if mode != MODE_UNICODE else prop_table.unicode_sentence_break
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['sentencebreak'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['sentencebreak'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_word_break_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `WORD BREAK` property."""
|
||
|
|
|
||
|
|
from .unidata import wordbreak as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_word_break if mode != MODE_UNICODE else prop_table.unicode_word_break
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['wordbreak'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['wordbreak'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_hangul_syllable_type_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `HANGUL SYLLABLE TYPE` property."""
|
||
|
|
|
||
|
|
from .unidata import hangulsyllabletype as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_hangul_syllable_type if mode != MODE_UNICODE else prop_table.unicode_hangul_syllable_type
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['hangulsyllabletype'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['hangulsyllabletype'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_indic_positional_category_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `INDIC POSITIONAL/MATRA CATEGORY` property."""
|
||
|
|
|
||
|
|
from .unidata import indicpositionalcategory as prop_table
|
||
|
|
|
||
|
|
if mode != MODE_UNICODE:
|
||
|
|
obj = prop_table.ascii_indic_positional_category
|
||
|
|
else:
|
||
|
|
obj = prop_table.unicode_indic_positional_category
|
||
|
|
alias_key = 'indicpositionalcategory'
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias[alias_key].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias[alias_key].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_indic_syllabic_category_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `INDIC SYLLABIC CATEGORY` property."""
|
||
|
|
|
||
|
|
from .unidata import indicsyllabiccategory as prop_table
|
||
|
|
|
||
|
|
if mode != MODE_UNICODE:
|
||
|
|
obj = prop_table.ascii_indic_syllabic_category
|
||
|
|
else:
|
||
|
|
obj = prop_table.unicode_indic_syllabic_category
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['indicsyllabiccategory'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['indicsyllabiccategory'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_decomposition_type_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `DECOMPOSITION TYPE` property."""
|
||
|
|
|
||
|
|
from .unidata import decompositiontype as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_decomposition_type if mode != MODE_UNICODE else prop_table.unicode_decomposition_type
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['decompositiontype'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['decompositiontype'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_nfc_quick_check_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NFC QUICK CHECK` property."""
|
||
|
|
|
||
|
|
from .unidata import quickcheck as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_nfc_quick_check if mode != MODE_UNICODE else prop_table.unicode_nfc_quick_check
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['nfcquickcheck'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['nfcquickcheck'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_nfd_quick_check_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NFD QUICK CHECK` property."""
|
||
|
|
|
||
|
|
from .unidata import quickcheck as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_nfd_quick_check if mode != MODE_UNICODE else prop_table.unicode_nfd_quick_check
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['nfdquickcheck'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['nfdquickcheck'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_nfkc_quick_check_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NFKC QUICK CHECK` property."""
|
||
|
|
|
||
|
|
from .unidata import quickcheck as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_nfkc_quick_check if mode != MODE_UNICODE else prop_table.unicode_nfkc_quick_check
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['nfkcquickcheck'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['nfkcquickcheck'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_nfkd_quick_check_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NFKD QUICK CHECK` property."""
|
||
|
|
|
||
|
|
from .unidata import quickcheck as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_nfkd_quick_check if mode != MODE_UNICODE else prop_table.unicode_nfkd_quick_check
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['nfkdquickcheck'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['nfkdquickcheck'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_numeric_type_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NUMERIC TYPE` property."""
|
||
|
|
|
||
|
|
from .unidata import numerictype as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_numeric_type if mode != MODE_UNICODE else prop_table.unicode_numeric_type
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['numerictype'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['numerictype'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_numeric_value_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `NUMERIC VALUE` property."""
|
||
|
|
|
||
|
|
from .unidata import numericvalue as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_numeric_values if mode != MODE_UNICODE else prop_table.unicode_numeric_values
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['numericvalue'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['numericvalue'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_age_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `AGE` property."""
|
||
|
|
|
||
|
|
from .unidata import age as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_age if mode != MODE_UNICODE else prop_table.unicode_age
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['age'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['age'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_joining_type_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `JOINING TYPE` property."""
|
||
|
|
|
||
|
|
from .unidata import joiningtype as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_joining_type if mode != MODE_UNICODE else prop_table.unicode_joining_type
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['joiningtype'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['joiningtype'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_joining_group_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `JOINING GROUP` property."""
|
||
|
|
|
||
|
|
from .unidata import joininggroup as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_joining_group if mode != MODE_UNICODE else prop_table.unicode_joining_group
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['joininggroup'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['joininggroup'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_script_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `SC` property."""
|
||
|
|
|
||
|
|
from .unidata import script as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_scripts if mode != MODE_UNICODE else prop_table.unicode_scripts
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['script'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['script'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_script_extension_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `SCX` property."""
|
||
|
|
|
||
|
|
from .unidata import scriptextensions as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_script_extensions if mode != MODE_UNICODE else prop_table.unicode_script_extensions
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['script'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['script'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_block_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `BLK` property."""
|
||
|
|
|
||
|
|
from .unidata import block as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_blocks if mode != MODE_UNICODE else prop_table.unicode_blocks
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['block'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['block'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_bidi_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `BC` property."""
|
||
|
|
|
||
|
|
from .unidata import bidiclass as prop_table
|
||
|
|
|
||
|
|
obj = prop_table.ascii_bidi_classes if mode != MODE_UNICODE else prop_table.unicode_bidi_classes
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['bidiclass'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['bidiclass'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_bidi_paired_bracket_type_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `BPT` property."""
|
||
|
|
|
||
|
|
from .unidata import bidipairedbrackettype as prop_table
|
||
|
|
|
||
|
|
if mode != MODE_UNICODE:
|
||
|
|
obj = prop_table.ascii_bidi_paired_bracket_type
|
||
|
|
else:
|
||
|
|
obj = prop_table.unicode_bidi_paired_bracket_type
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['bidipairedbrackettype'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['bidipairedbrackettype'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_vertical_orientation_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get `VO` property."""
|
||
|
|
|
||
|
|
from .unidata import verticalorientation as prop_table
|
||
|
|
|
||
|
|
if mode != MODE_UNICODE:
|
||
|
|
obj = prop_table.ascii_vertical_orientation
|
||
|
|
else:
|
||
|
|
obj = prop_table.unicode_vertical_orientation
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
negated = value[1:]
|
||
|
|
value = '^' + alias.unicode_alias['verticalorientation'].get(negated, negated)
|
||
|
|
else:
|
||
|
|
value = alias.unicode_alias['verticalorientation'].get(value, value)
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_is_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get shortcut for `SC` or `Binary` property."""
|
||
|
|
|
||
|
|
from .unidata import scriptextensions as scx
|
||
|
|
from .unidata import binary
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
prefix = value[1:3]
|
||
|
|
temp = value[3:]
|
||
|
|
negate = '^'
|
||
|
|
else:
|
||
|
|
prefix = value[:2]
|
||
|
|
temp = value[2:]
|
||
|
|
negate = ''
|
||
|
|
|
||
|
|
if prefix != 'is':
|
||
|
|
raise ValueError("Does not start with 'is'!")
|
||
|
|
|
||
|
|
script_obj = scx.ascii_script_extensions if mode != MODE_UNICODE else scx.unicode_script_extensions
|
||
|
|
bin_obj = binary.ascii_binary if mode != MODE_UNICODE else binary.unicode_binary
|
||
|
|
|
||
|
|
value = negate + alias.unicode_alias['script'].get(temp, temp)
|
||
|
|
|
||
|
|
if value not in script_obj:
|
||
|
|
value = negate + alias.unicode_alias['binary'].get(temp, temp)
|
||
|
|
obj = bin_obj
|
||
|
|
else:
|
||
|
|
obj = script_obj
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def get_in_property(value: str, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Get shortcut for `Block` property."""
|
||
|
|
|
||
|
|
from .unidata import block as prop_table
|
||
|
|
|
||
|
|
if value.startswith('^'):
|
||
|
|
prefix = value[1:3]
|
||
|
|
temp = value[3:]
|
||
|
|
negate = '^'
|
||
|
|
else:
|
||
|
|
prefix = value[:2]
|
||
|
|
temp = value[2:]
|
||
|
|
negate = ''
|
||
|
|
|
||
|
|
if prefix != 'in':
|
||
|
|
raise ValueError("Does not start with 'in'!")
|
||
|
|
|
||
|
|
value = negate + alias.unicode_alias['block'].get(temp, temp)
|
||
|
|
obj = prop_table.ascii_blocks if mode != MODE_UNICODE else prop_table.unicode_blocks
|
||
|
|
|
||
|
|
return fmt_string(obj[value], mode == MODE_ASCII)
|
||
|
|
|
||
|
|
|
||
|
|
def _is_binary(name: str) -> bool:
|
||
|
|
"""Check if name is an enum (not a binary) property."""
|
||
|
|
|
||
|
|
from .unidata import binary as prop_table
|
||
|
|
|
||
|
|
return name in prop_table.unicode_binary or name in alias.unicode_alias['binary']
|
||
|
|
|
||
|
|
|
||
|
|
def get_unicode_property(prop: str, value: str | None = None, mode: int = MODE_UNICODE) -> str:
|
||
|
|
"""Retrieve the Unicode category from the table."""
|
||
|
|
|
||
|
|
if value is not None:
|
||
|
|
|
||
|
|
negate = prop.startswith('^')
|
||
|
|
|
||
|
|
# Normalize binary true/false input so we can handle it properly
|
||
|
|
if _is_binary(prop):
|
||
|
|
name = prop[1:] if negate else prop
|
||
|
|
|
||
|
|
if value in ('n', 'no', 'f', 'false'):
|
||
|
|
negate = not negate
|
||
|
|
elif value not in ('y', 'yes', 't', 'true'):
|
||
|
|
raise ValueError(f"'{value}' is not a valid value for the binary property '{prop}'")
|
||
|
|
|
||
|
|
return get_binary_property('^' + name if negate else name, mode)
|
||
|
|
else:
|
||
|
|
if negate:
|
||
|
|
value = '^' + value
|
||
|
|
name = prop[1:]
|
||
|
|
else:
|
||
|
|
name = prop
|
||
|
|
|
||
|
|
name = alias.unicode_alias['_'].get(name, name)
|
||
|
|
try:
|
||
|
|
if name == 'generalcategory':
|
||
|
|
return get_gc_property(value, mode)
|
||
|
|
elif name == 'script':
|
||
|
|
return get_script_property(value, mode)
|
||
|
|
elif name == 'scriptextensions':
|
||
|
|
return get_script_extension_property(value, mode)
|
||
|
|
elif name == 'block':
|
||
|
|
return get_block_property(value, mode)
|
||
|
|
elif name == 'bidiclass':
|
||
|
|
return get_bidi_property(value, mode)
|
||
|
|
elif name == 'bidipairedbrackettype':
|
||
|
|
return get_bidi_paired_bracket_type_property(value, mode)
|
||
|
|
elif name == 'age':
|
||
|
|
return get_age_property(value, mode)
|
||
|
|
elif name == 'eastasianwidth':
|
||
|
|
return get_east_asian_width_property(value, mode)
|
||
|
|
elif name == 'indicpositionalcategory':
|
||
|
|
return get_indic_positional_category_property(value, mode)
|
||
|
|
elif name == 'indicsyllabiccategory':
|
||
|
|
return get_indic_syllabic_category_property(value, mode)
|
||
|
|
elif name == 'hangulsyllabletype':
|
||
|
|
return get_hangul_syllable_type_property(value, mode)
|
||
|
|
elif name == 'decompositiontype':
|
||
|
|
return get_decomposition_type_property(value, mode)
|
||
|
|
elif name == 'canonicalcombiningclass':
|
||
|
|
return get_canonical_combining_class_property(value, mode)
|
||
|
|
elif name == 'numerictype':
|
||
|
|
return get_numeric_type_property(value, mode)
|
||
|
|
elif name == 'numericvalue':
|
||
|
|
return get_numeric_value_property(value, mode)
|
||
|
|
elif name == 'joiningtype':
|
||
|
|
return get_joining_type_property(value, mode)
|
||
|
|
elif name == 'joininggroup':
|
||
|
|
return get_joining_group_property(value, mode)
|
||
|
|
elif name == 'graphemeclusterbreak':
|
||
|
|
return get_grapheme_cluster_break_property(value, mode)
|
||
|
|
elif name == 'linebreak':
|
||
|
|
return get_line_break_property(value, mode)
|
||
|
|
elif name == 'sentencebreak':
|
||
|
|
return get_sentence_break_property(value, mode)
|
||
|
|
elif name == 'wordbreak':
|
||
|
|
return get_word_break_property(value, mode)
|
||
|
|
elif name == 'nfcquickcheck':
|
||
|
|
return get_nfc_quick_check_property(value, mode)
|
||
|
|
elif name == 'nfdquickcheck':
|
||
|
|
return get_nfd_quick_check_property(value, mode)
|
||
|
|
elif name == 'nfkcquickcheck':
|
||
|
|
return get_nfkc_quick_check_property(value, mode)
|
||
|
|
elif name == 'nfkdquickcheck':
|
||
|
|
return get_nfkd_quick_check_property(value, mode)
|
||
|
|
elif name == 'verticalorientation':
|
||
|
|
return get_vertical_orientation_property(value, mode)
|
||
|
|
else:
|
||
|
|
raise ValueError(f"'{prop}={value}' does not have a valid property name")
|
||
|
|
except Exception as e:
|
||
|
|
raise ValueError(f"'{prop}={value}' does not appear to be a valid property") from e
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_gc_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_script_extension_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_binary_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_block_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_is_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
return get_in_property(prop, mode)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
raise ValueError(f"'{prop}' does not appear to be a valid property")
|