1548 lines
52 KiB
Python
1548 lines
52 KiB
Python
"""
|
|
Backrefs Re parser.
|
|
|
|
Licensed under MIT
|
|
Copyright (c) 2011 - 2020 Isaac Muse <isaacmuse@gmail.com>
|
|
"""
|
|
from __future__ import annotations
|
|
import re as _re
|
|
import sys
|
|
import copyreg as _copyreg
|
|
from . import util as _util
|
|
import unicodedata as _unicodedata
|
|
from . import uniprops as _uniprops
|
|
from typing import Generic, AnyStr, Match, Any, Pattern, cast
|
|
|
|
if sys.version_info >= (3, 11):
|
|
import re._parser as _parser # type: ignore[import]
|
|
else:
|
|
import sre_parse as _parser
|
|
|
|
__all__ = ("ReplaceTemplate",)
|
|
|
|
_ASCII_LETTERS = frozenset(
|
|
(
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
|
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
|
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
|
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
|
|
)
|
|
)
|
|
_DIGIT = frozenset(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
|
|
_OCTAL = frozenset(('0', '1', '2', '3', '4', '5', '6', '7'))
|
|
_HEX = frozenset(('a', 'b', 'c', 'd', 'e', 'f', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
|
|
_LETTERS_UNDERSCORE = _ASCII_LETTERS | frozenset(('_',))
|
|
_WORD = _LETTERS_UNDERSCORE | _DIGIT
|
|
_STANDARD_ESCAPES = frozenset(('a', 'b', 'f', 'n', 'r', 't', 'v'))
|
|
_CURLY_BRACKETS = frozenset(('{', '}'))
|
|
_PROPERTY_STRIP = frozenset((' ', '-', '_'))
|
|
_PROPERTY = _WORD | _DIGIT | _PROPERTY_STRIP
|
|
_GLOBAL_FLAGS = frozenset(('a', 'i', 'L', 'm', 's', 'u', 'x'))
|
|
_SCOPED_FLAGS = frozenset(('i', 'm', 's', 'x'))
|
|
_SCOPED_END = frozenset((':', ')'))
|
|
|
|
_CURLY_BRACKETS_ORD = frozenset((0x7b, 0x7d))
|
|
|
|
_COMPATIBILITY_PROPERTIES = frozenset(
|
|
(
|
|
'alpha', 'lower', 'upper', 'punct', 'digit', 'xdigit', 'alnum',
|
|
'space', 'blank', 'cntrl', 'graph', 'print', 'word'
|
|
)
|
|
)
|
|
|
|
# Case upper or lower
|
|
_UPPER = 1
|
|
_LOWER = 2
|
|
|
|
# Format Constants
|
|
_BACK_SLASH_TRANSLATION = {
|
|
"\\a": '\a',
|
|
"\\b": '\b',
|
|
"\\f": '\f',
|
|
"\\r": '\r',
|
|
"\\t": '\t',
|
|
"\\n": '\n',
|
|
"\\v": '\v',
|
|
"\\\\": '\\'
|
|
}
|
|
|
|
_FMT_CONV_TYPE = ('a', 'r', 's')
|
|
|
|
|
|
class LoopException(Exception):
|
|
"""Loop exception."""
|
|
|
|
|
|
class GlobalRetryException(Exception):
|
|
"""Global retry exception."""
|
|
|
|
|
|
class _SearchParser(Generic[AnyStr]):
|
|
"""Search Template."""
|
|
|
|
_new_refs = ("c", "C", "e", "E", "h", "l", "L", "m", "M", "N", "p", "P", "Q", "R", "X")
|
|
_re_start_wb = r"\b(?=\w)"
|
|
_re_end_wb = r"\b(?<=\w)"
|
|
_line_break = r'(?:\r\n|(?!\r\n)[\n\v\f\r\x85\u2028\u2029])'
|
|
_bytes_line_break = r'(?>\r\n|[\n\v\f\r\x85])' if _util.PY311 else r'(?:\r\n|(?!\r\n)[\n\v\f\r\x85])'
|
|
_grapheme_cluster = r'(?:{}{}*(?!{}))'
|
|
|
|
verbose: bool
|
|
unicode: bool
|
|
global_flag_swap: dict[str, bool]
|
|
temp_global_flag_swap: dict[str, bool]
|
|
ascii: bool # noqa: A003
|
|
is_bytes: bool
|
|
search: AnyStr
|
|
|
|
def __init__(self, search: AnyStr, re_verbose: bool = False, re_unicode: bool | None = None) -> None:
|
|
"""Initialize."""
|
|
|
|
if isinstance(search, bytes):
|
|
self.is_bytes = True
|
|
else:
|
|
self.is_bytes = False
|
|
|
|
if self.is_bytes:
|
|
self._re_line_break = self._bytes_line_break
|
|
else:
|
|
self._re_line_break = self._line_break
|
|
self.search = search
|
|
self.re_verbose = re_verbose
|
|
self.re_unicode = re_unicode
|
|
|
|
def process_quotes(self, text: str) -> str:
|
|
"""Process quotes."""
|
|
|
|
escaped = False
|
|
in_quotes = False
|
|
current = []
|
|
quoted = [] # type: list[str]
|
|
i = _util.StringIter(text)
|
|
|
|
for t in i:
|
|
if not escaped and t == "\\":
|
|
escaped = True
|
|
elif escaped:
|
|
escaped = False
|
|
if t == "E":
|
|
if in_quotes:
|
|
current.append(_re.escape("".join(quoted)))
|
|
quoted = []
|
|
in_quotes = False
|
|
elif t == "Q" and not in_quotes:
|
|
in_quotes = True
|
|
elif in_quotes:
|
|
quoted.extend(["\\", t])
|
|
else:
|
|
current.extend(["\\", t])
|
|
elif in_quotes:
|
|
quoted.extend(t)
|
|
else:
|
|
current.append(t)
|
|
|
|
if in_quotes and escaped:
|
|
quoted.append("\\")
|
|
elif escaped:
|
|
current.append("\\")
|
|
|
|
if quoted:
|
|
current.append(_re.escape("".join(quoted)))
|
|
|
|
return "".join(current)
|
|
|
|
def verbose_comment(self, t: str, i: _util.StringIter) -> list[str]:
|
|
"""Handle verbose comments."""
|
|
|
|
current = []
|
|
escaped = False
|
|
|
|
try:
|
|
while t != "\n":
|
|
if not escaped and t == "\\":
|
|
escaped = True
|
|
current.append(t)
|
|
elif escaped:
|
|
escaped = False
|
|
if t in self._new_refs:
|
|
current.append("\\")
|
|
current.append(t)
|
|
else:
|
|
current.append(t)
|
|
t = next(i)
|
|
except StopIteration:
|
|
pass
|
|
|
|
if t == "\n":
|
|
current.append(t)
|
|
return current
|
|
|
|
def flags(self, text: str, scoped: bool = False) -> None:
|
|
"""Analyze flags."""
|
|
|
|
flags = text.split('-')
|
|
enable = flags[0]
|
|
disable = flags[1] if len(flags) > 1 else ''
|
|
|
|
global_retry = False
|
|
if ('a' in enable or 'L' in enable) and self.unicode:
|
|
self.unicode = False
|
|
if not scoped:
|
|
self.temp_global_flag_swap["unicode"] = True
|
|
global_retry = True
|
|
elif 'u' in enable and not self.unicode and not self.is_bytes:
|
|
self.unicode = True
|
|
if not scoped:
|
|
self.temp_global_flag_swap["unicode"] = True
|
|
global_retry = True
|
|
if 'x' in disable and self.verbose:
|
|
self.verbose = False
|
|
elif 'x' in enable and not self.verbose:
|
|
self.verbose = True
|
|
if not scoped:
|
|
self.temp_global_flag_swap["verbose"] = True
|
|
global_retry = True
|
|
if global_retry:
|
|
raise GlobalRetryException('Global Retry')
|
|
|
|
def get_unicode_property(self, i: _util.StringIter, brackets: bool = False) -> tuple[str, str]:
|
|
"""Get Unicode property."""
|
|
|
|
index = i.index
|
|
prop = []
|
|
value = []
|
|
try:
|
|
c = next(i)
|
|
if c.upper() in _ASCII_LETTERS:
|
|
prop.append(c)
|
|
elif (not brackets and c != '{') or (brackets and c != ':'):
|
|
raise SyntaxError(f"Unicode property missing '{{' at {i.index - 1}!")
|
|
else:
|
|
c = next(i)
|
|
if c == '^':
|
|
prop.append(c)
|
|
c = next(i)
|
|
|
|
while c not in (':', '=', '}'):
|
|
if c not in _PROPERTY:
|
|
raise SyntaxError(f'Invalid Unicode property character at {i.index - 1}!')
|
|
if c not in _PROPERTY_STRIP:
|
|
prop.append(c)
|
|
c = next(i)
|
|
|
|
if c in (':', '='):
|
|
skip = False
|
|
if brackets:
|
|
is_colon = c == ':'
|
|
c = next(i)
|
|
if is_colon and c == ']':
|
|
# That's the end of the property
|
|
skip = True
|
|
end = ':'
|
|
else:
|
|
c = next(i)
|
|
end = '}'
|
|
|
|
# Get the property value
|
|
if not skip:
|
|
while c != end:
|
|
if c not in _PROPERTY:
|
|
raise SyntaxError(f'Invalid Unicode property character at {i.index - 1}!')
|
|
if c not in _PROPERTY_STRIP:
|
|
value.append(c)
|
|
c = next(i)
|
|
if brackets and c == ':':
|
|
c = next(i)
|
|
if c != ']':
|
|
raise SyntaxError(f'Invalid Unicode property character at {i.index - 1}!')
|
|
if not value:
|
|
raise SyntaxError('Invalid Unicode property!')
|
|
|
|
except StopIteration as e:
|
|
if brackets:
|
|
raise SyntaxError(f"Missing or unmatched ':]' at {index}!") from e
|
|
else:
|
|
raise SyntaxError(f"Missing or unmatched '{{' at {index}!") from e
|
|
|
|
p = ''.join(prop).lower()
|
|
v = ''.join(value).lower()
|
|
|
|
# Ensure when using POSIX form, that any property considered a compatibility property uses the POSIX form.
|
|
# POSIX form is not guaranteed to be different from standard form and sometimes is just an alias for standard.
|
|
if brackets and p in _COMPATIBILITY_PROPERTIES:
|
|
p = 'posix' + p
|
|
|
|
return p, v
|
|
|
|
def get_named_unicode(self, i: _util.StringIter) -> str:
|
|
"""Get Unicode name."""
|
|
|
|
index = i.index
|
|
value = []
|
|
try:
|
|
if next(i) != '{':
|
|
raise ValueError(f"Named Unicode missing '{{' {i.index - 1}!")
|
|
c = next(i)
|
|
while c != '}':
|
|
value.append(c)
|
|
c = next(i)
|
|
except Exception as e:
|
|
raise SyntaxError(f"Unmatched '{{' at {index}!") from e
|
|
|
|
return ''.join(value)
|
|
|
|
def reference(self, t: str, i: _util.StringIter, in_group: bool = False) -> list[str]:
|
|
"""Handle references."""
|
|
|
|
current = []
|
|
|
|
if not in_group and t == "m":
|
|
current.append(self._re_start_wb)
|
|
elif not in_group and t == "M":
|
|
current.append(self._re_end_wb)
|
|
elif not in_group and t == "R":
|
|
current.append(self._re_line_break)
|
|
elif not in_group and t == "X":
|
|
no_mark = self.unicode_props("^m", None, in_group=False)[0]
|
|
mark = self.unicode_props("m", None, in_group=False)[0]
|
|
current.extend(self._grapheme_cluster.format(no_mark, mark, mark))
|
|
elif t == 'p':
|
|
prop = self.get_unicode_property(i)
|
|
current.extend(self.unicode_props(prop[0], prop[1], in_group=in_group))
|
|
if in_group:
|
|
self.found_property = True
|
|
elif t == 'P':
|
|
prop = self.get_unicode_property(i)
|
|
current.extend(self.unicode_props(prop[0], prop[1], in_group=in_group, negate=True))
|
|
if in_group:
|
|
self.found_property = True
|
|
elif t == "N":
|
|
text = self.get_named_unicode(i)
|
|
current.extend(self.unicode_name(text, in_group))
|
|
if in_group:
|
|
self.found_named_unicode = True
|
|
else:
|
|
current.extend(["\\", t])
|
|
return current
|
|
|
|
def get_comments(self, i: _util.StringIter) -> str | None:
|
|
"""Get comments."""
|
|
|
|
index = i.index
|
|
value = ['(']
|
|
escaped = False
|
|
try:
|
|
c = next(i)
|
|
if c != '?':
|
|
i.rewind(1)
|
|
return None
|
|
value.append(c)
|
|
c = next(i)
|
|
if c != '#':
|
|
i.rewind(2)
|
|
return None
|
|
value.append(c)
|
|
c = next(i)
|
|
while c != ')' or escaped is True:
|
|
if escaped:
|
|
escaped = False
|
|
elif c == '\\':
|
|
escaped = True
|
|
value.append(c)
|
|
c = next(i)
|
|
value.append(c)
|
|
except StopIteration as e:
|
|
raise SyntaxError(f"Unmatched '(' at {index - 1}!") from e
|
|
|
|
return ''.join(value)
|
|
|
|
def get_flags(self, i: _util.StringIter) -> tuple[str | None, bool]:
|
|
"""
|
|
Get flags.
|
|
|
|
In Re, flags are quite predictable when global or scoped.
|
|
Global can never be disabled with minus, and never have a `:` after them.
|
|
The global flag set is also very specific, but can be used as enablers in scoped.
|
|
|
|
The returned scoped status will indicate whether flags are generally considered
|
|
scoped flags or global flags.
|
|
"""
|
|
|
|
index = i.index
|
|
value = ['(']
|
|
toggle = False
|
|
smells_scoped = False
|
|
try:
|
|
c = next(i)
|
|
if c != '?':
|
|
i.rewind(1)
|
|
return None, False
|
|
value.append(c)
|
|
c = next(i)
|
|
while c not in _SCOPED_END:
|
|
if toggle:
|
|
if c not in _SCOPED_FLAGS:
|
|
raise ValueError('Bad scope')
|
|
elif c == '-':
|
|
smells_scoped = True
|
|
toggle = True
|
|
elif c not in _GLOBAL_FLAGS:
|
|
raise ValueError("Bad flag")
|
|
value.append(c)
|
|
c = next(i)
|
|
if smells_scoped and c != ':':
|
|
raise ValueError("Bad flag")
|
|
elif c == ':':
|
|
smells_scoped = True
|
|
value.append(c)
|
|
except Exception:
|
|
i.rewind(i.index - index)
|
|
value = []
|
|
|
|
return ''.join(value) if value else None, smells_scoped
|
|
|
|
def subgroup(self, t: str, i: _util.StringIter) -> list[str]:
|
|
"""Handle parenthesis."""
|
|
|
|
current = [] # type: list[str]
|
|
|
|
# (?#comment)
|
|
comments = self.get_comments(i)
|
|
if comments:
|
|
return [comments]
|
|
|
|
verbose = self.verbose
|
|
unicode_flag = self.unicode
|
|
|
|
# (?flags:pattern) or (?flags)
|
|
flags, scoped = self.get_flags(i)
|
|
if flags: # pragma: no cover
|
|
t = flags
|
|
self.flags(flags[2:-1], scoped=scoped)
|
|
if not scoped:
|
|
return [flags]
|
|
|
|
current = []
|
|
try:
|
|
while t != ')':
|
|
if not current:
|
|
current.append(t)
|
|
else:
|
|
current.extend(self.normal(t, i))
|
|
|
|
t = next(i)
|
|
except StopIteration:
|
|
pass
|
|
|
|
# Restore flags after group
|
|
self.verbose = verbose
|
|
self.unicode = unicode_flag
|
|
|
|
if t == ")":
|
|
current.append(t)
|
|
return current
|
|
|
|
def char_groups(self, t: str, i: _util.StringIter) -> list[str]:
|
|
"""Handle character groups."""
|
|
|
|
current = []
|
|
pos = i.index - 1
|
|
found = False
|
|
escaped = False
|
|
first = 0
|
|
found_property = False
|
|
self.found_property = False
|
|
self.found_named_unicode = False
|
|
|
|
try:
|
|
while True:
|
|
# Prevent POSIX/Unicode class from being part of a range.
|
|
if self.found_property and t == '-':
|
|
current.append(_re.escape(t))
|
|
pos += 1
|
|
t = next(i)
|
|
self.found_property = False
|
|
continue
|
|
else:
|
|
self.found_property = False
|
|
|
|
if not escaped and t == "\\":
|
|
escaped = True
|
|
elif escaped:
|
|
escaped = False
|
|
idx = len(current) - 1
|
|
current.extend(self.reference(t, i, True))
|
|
if self.found_property:
|
|
# Prevent Unicode class from being part of a range.
|
|
if idx >= 0 and current[idx] == '-':
|
|
current[idx] = _re.escape('-')
|
|
found_property = True
|
|
elif t == "[" and not found:
|
|
found = True
|
|
first = pos
|
|
current.append(t)
|
|
elif t == "[":
|
|
index = i.index
|
|
try:
|
|
prop = self.get_unicode_property(i, True) # type: tuple[str, str] | None
|
|
except Exception:
|
|
prop = None
|
|
i.rewind(i.index - index)
|
|
if prop is not None:
|
|
value = self.unicode_props(prop[0], prop[1], in_group=True)
|
|
if current[-1] == '-':
|
|
current[-1] = _re.escape('-')
|
|
current.extend(value)
|
|
found_property = True
|
|
pos = i.index - 2
|
|
else:
|
|
current.append(t)
|
|
elif t == "^" and found and (pos == first + 1):
|
|
first = pos
|
|
current.append(t)
|
|
elif t == "]" and found and (pos != first + 1):
|
|
found = False
|
|
current.append(t)
|
|
break
|
|
else:
|
|
current.append(t)
|
|
pos += 1
|
|
t = next(i)
|
|
except StopIteration:
|
|
pass
|
|
|
|
if escaped:
|
|
current.append(t)
|
|
|
|
# Handle properties that return an empty string.
|
|
# This will occur when a property's values exceed
|
|
# either the Unicode char limit on a narrow system,
|
|
# or the ASCII limit in a byte string pattern.
|
|
if found_property or self.found_named_unicode:
|
|
temp = "".join(current)
|
|
if temp == '[]':
|
|
# We specified some properties, but they are all
|
|
# out of reach. Therefore we can match nothing.
|
|
current = [f'[^{_uniprops.ASCII_RANGE if self.is_bytes else _uniprops.UNICODE_RANGE}]']
|
|
elif temp == '[^]':
|
|
current = [f'[{_uniprops.ASCII_RANGE if self.is_bytes else _uniprops.UNICODE_RANGE}]']
|
|
else:
|
|
current = [temp]
|
|
|
|
return current
|
|
|
|
def normal(self, t: str, i: _util.StringIter) -> list[str]:
|
|
"""Handle normal chars."""
|
|
|
|
current = []
|
|
|
|
if t == "\\":
|
|
try:
|
|
t = next(i)
|
|
current.extend(self.reference(t, i))
|
|
except StopIteration:
|
|
current.append(t)
|
|
elif t == "(":
|
|
current.extend(self.subgroup(t, i))
|
|
elif self.verbose and t == "#":
|
|
current.extend(self.verbose_comment(t, i))
|
|
elif t == "[":
|
|
current.extend(self.char_groups(t, i))
|
|
else:
|
|
current.append(t)
|
|
return current
|
|
|
|
def unicode_name(self, name: str, in_group: bool = False) -> list[str]:
|
|
"""Insert Unicode value by its name."""
|
|
|
|
value = ord(_unicodedata.lookup(name))
|
|
if self.is_bytes and value > 0xFF:
|
|
if not in_group:
|
|
return [f'[^{_uniprops.ASCII_RANGE if self.is_bytes else _uniprops.UNICODE_RANGE}]']
|
|
else:
|
|
return ['']
|
|
return [f'\\{value:03o}' if value <= 0xFF else chr(value)]
|
|
|
|
def unicode_props(
|
|
self,
|
|
props: str,
|
|
prop_value: str | None,
|
|
in_group: bool = False,
|
|
negate: bool = False
|
|
) -> list[str]:
|
|
"""
|
|
Insert Unicode properties.
|
|
|
|
Unicode properties are very forgiving.
|
|
Case doesn't matter and `[ -_]` will be stripped out.
|
|
"""
|
|
|
|
if props.startswith("^"):
|
|
if negate:
|
|
props = props[1:]
|
|
elif negate:
|
|
props = '^' + props
|
|
if not prop_value and prop_value is not None:
|
|
prop_value = None
|
|
|
|
if self.is_bytes:
|
|
mode = _uniprops.MODE_ASCII
|
|
elif not self.unicode:
|
|
mode = _uniprops.MODE_NORMAL
|
|
else:
|
|
mode = _uniprops.MODE_UNICODE
|
|
|
|
v = _uniprops.get_unicode_property(props, prop_value, mode)
|
|
if not in_group:
|
|
if not v:
|
|
v = f'^{_uniprops.ASCII_RANGE if self.is_bytes else _uniprops.UNICODE_RANGE}'
|
|
v = f"[{v}]"
|
|
properties = [v]
|
|
|
|
return properties
|
|
|
|
def main_group(self, i: _util.StringIter) -> list[str]:
|
|
"""The main group: group 0."""
|
|
|
|
current = []
|
|
try:
|
|
while True:
|
|
t = next(i)
|
|
current.extend(self.normal(t, i))
|
|
except StopIteration:
|
|
pass
|
|
return current
|
|
|
|
def _parse(self, search: str) -> str:
|
|
"""Begin parsing."""
|
|
|
|
self.verbose = bool(self.re_verbose)
|
|
self.unicode = bool(self.re_unicode)
|
|
self.global_flag_swap = {
|
|
"unicode": False,
|
|
"verbose": False
|
|
}
|
|
self.temp_global_flag_swap = {
|
|
"unicode": False,
|
|
"verbose": False
|
|
}
|
|
self.ascii = self.re_unicode is not None and not self.re_unicode
|
|
if not self.unicode and not self.ascii:
|
|
self.unicode = True
|
|
|
|
new_pattern = []
|
|
i = _util.StringIter(self.process_quotes(search))
|
|
|
|
retry = True
|
|
while retry:
|
|
retry = False
|
|
try:
|
|
new_pattern = self.main_group(i)
|
|
except GlobalRetryException as e:
|
|
# Prevent a loop of retry over and over for a pattern like ((?u)(?a))
|
|
# or (?-x:(?x))
|
|
if self.temp_global_flag_swap['unicode']:
|
|
if self.global_flag_swap['unicode']:
|
|
raise LoopException('Global unicode flag recursion.') from e
|
|
else:
|
|
self.global_flag_swap["unicode"] = True
|
|
if self.temp_global_flag_swap['verbose']:
|
|
if self.global_flag_swap['verbose']:
|
|
raise LoopException('Global verbose flag recursion.') from e
|
|
else:
|
|
self.global_flag_swap['verbose'] = True
|
|
self.temp_global_flag_swap = {
|
|
"unicode": False,
|
|
"verbose": False
|
|
}
|
|
i.rewind(i.index)
|
|
retry = True
|
|
return "".join(new_pattern)
|
|
|
|
def parse(self) -> AnyStr:
|
|
"""Apply search template."""
|
|
|
|
if isinstance(self.search, bytes):
|
|
return self._parse(self.search.decode('latin-1')).encode('latin-1')
|
|
else:
|
|
return self._parse(self.search)
|
|
|
|
|
|
class _ReplaceParser(Generic[AnyStr]):
|
|
"""Pre-replace template."""
|
|
|
|
def __init__(self, pattern: Pattern[AnyStr], template: AnyStr, use_format: bool = False) -> None:
|
|
"""Initialize."""
|
|
|
|
self.pattern = pattern # type: Pattern[AnyStr]
|
|
self._original = template # type: AnyStr
|
|
self._template = template # type: AnyStr
|
|
self.use_format = use_format
|
|
self.end_found = False
|
|
self.group_slots = [] # type: list[tuple[int, tuple[int | None, int | None, Any]]]
|
|
self.literal_slots = [] # type: list[str]
|
|
self.result = [] # type: list[str]
|
|
self.span_stack = [] # type: list[int]
|
|
self.single_stack = [] # type: list[int]
|
|
self.literals = [] # type: list[AnyStr | None]
|
|
self.groups = [] # type: list[tuple[int, int]]
|
|
self.slot = 0
|
|
self.manual = False
|
|
self.auto = False
|
|
self.auto_index = 0
|
|
self.is_bytes = isinstance(self._original, bytes)
|
|
|
|
def parse_format_index(self, text: str) -> int | str:
|
|
"""Parse format index."""
|
|
|
|
base = 10
|
|
prefix = text[1:3] if text[0] == "-" else text[:2]
|
|
if prefix[0:1] == "0":
|
|
char = prefix[-1]
|
|
if char == "b":
|
|
base = 2
|
|
elif char == "o":
|
|
base = 8
|
|
elif char == "x":
|
|
base = 16
|
|
try:
|
|
idx = int(text, base) # type: int | str
|
|
except Exception:
|
|
idx = text
|
|
return idx
|
|
|
|
def get_format(self, c: str, i: _util.StringIter) -> tuple[str, list[tuple[int, Any]]]:
|
|
"""Get format group."""
|
|
|
|
index = i.index
|
|
field = ''
|
|
value = [] # type: list[tuple[int, Any]]
|
|
|
|
try:
|
|
if c == '}':
|
|
value.append((_util.FMT_FIELD, ''))
|
|
value.append((_util.FMT_INDEX, -1))
|
|
else:
|
|
# Field
|
|
temp = [] # type: list[str]
|
|
if c in _LETTERS_UNDERSCORE:
|
|
# Handle name
|
|
temp.append(c)
|
|
c = self.format_next(i)
|
|
while c in _WORD:
|
|
temp.append(c)
|
|
c = self.format_next(i)
|
|
elif c in _DIGIT:
|
|
# Handle group number
|
|
temp.append(c)
|
|
c = self.format_next(i)
|
|
while c in _DIGIT:
|
|
temp.append(c)
|
|
c = self.format_next(i)
|
|
|
|
# Try and covert to integer index
|
|
field = ''.join(temp).strip()
|
|
try:
|
|
value = [(_util.FMT_FIELD, str(int(field, 10)))]
|
|
except ValueError:
|
|
value = [(_util.FMT_FIELD, field)]
|
|
pass
|
|
|
|
if c != '[':
|
|
value.append((_util.FMT_INDEX, None))
|
|
|
|
# Attributes and indexes
|
|
while c in ('[', '.'):
|
|
if c == '[':
|
|
findex = []
|
|
sindex = i.index - 1
|
|
c = self.format_next(i)
|
|
try:
|
|
while c != ']':
|
|
findex.append(c)
|
|
c = self.format_next(i)
|
|
except StopIteration as e:
|
|
raise SyntaxError(f"Unmatched '[' at {sindex - 1}") from e
|
|
idx = self.parse_format_index(''.join(findex))
|
|
value.append((_util.FMT_INDEX, idx))
|
|
c = self.format_next(i)
|
|
else:
|
|
findex = []
|
|
c = self.format_next(i)
|
|
while c in _WORD:
|
|
findex.append(c)
|
|
c = self.format_next(i)
|
|
value.append((_util.FMT_ATTR, ''.join(findex)))
|
|
|
|
# Conversion
|
|
if c == '!':
|
|
c = self.format_next(i)
|
|
if c not in _FMT_CONV_TYPE:
|
|
raise SyntaxError(f"Invalid conversion type at {i.index - 1}!")
|
|
value.append((_util.FMT_CONV, c))
|
|
c = self.format_next(i)
|
|
|
|
# Format spec
|
|
if c == ':':
|
|
fill = None # type: str | None
|
|
width = []
|
|
align = None
|
|
convert = None
|
|
c = self.format_next(i)
|
|
|
|
if c in ('<', '>', '^'):
|
|
# Get fill and alignment
|
|
align = c
|
|
c = self.format_next(i)
|
|
if c in ('<', '>', '^'):
|
|
fill = align
|
|
align = c
|
|
c = self.format_next(i)
|
|
elif c in _DIGIT:
|
|
# Get Width
|
|
fill = c
|
|
c = self.format_next(i)
|
|
if c in ('<', '>', '^'):
|
|
align = c
|
|
c = self.format_next(i)
|
|
else:
|
|
width.append(fill)
|
|
fill = None
|
|
else:
|
|
fill = c
|
|
c = self.format_next(i)
|
|
if fill == 's' and c == '}':
|
|
convert = fill
|
|
fill = None
|
|
if fill is not None:
|
|
if c not in ('<', '>', '^'):
|
|
raise SyntaxError(f'Invalid format spec char at {i.index - 1}!')
|
|
align = c
|
|
c = self.format_next(i)
|
|
|
|
while c in _DIGIT:
|
|
width.append(c)
|
|
c = self.format_next(i)
|
|
|
|
if not align and len(width) and width[0] == '0':
|
|
raise ValueError("'=' alignment is not supported!")
|
|
if align and not fill and len(width) and width[0] == '0':
|
|
fill = '0'
|
|
|
|
if c == 's':
|
|
convert = c
|
|
c = self.format_next(i)
|
|
|
|
if not fill:
|
|
fill = ' '
|
|
|
|
value.append(
|
|
(
|
|
_util.FMT_SPEC,
|
|
(
|
|
fill.encode('latin-1') if self.is_bytes else fill,
|
|
align,
|
|
(int(''.join(width)) if width else 0),
|
|
convert
|
|
)
|
|
)
|
|
)
|
|
|
|
if c != '}':
|
|
raise SyntaxError(f"Unmatched '{{' at {index - 1}")
|
|
except StopIteration as e:
|
|
raise SyntaxError(f"Unmatched '{{' at {index - 1}!") from e
|
|
|
|
return field, value
|
|
|
|
def handle_format(self, t: str, i: _util.StringIter) -> None:
|
|
"""Handle format."""
|
|
|
|
if t == '{':
|
|
t = self.format_next(i)
|
|
if t == '{':
|
|
self.get_single_stack()
|
|
self.result.append(t)
|
|
else:
|
|
field, text = self.get_format(t, i)
|
|
self.handle_format_group(field, text)
|
|
else:
|
|
t = self.format_next(i)
|
|
if t == '}':
|
|
self.get_single_stack()
|
|
self.result.append(t)
|
|
else:
|
|
raise SyntaxError(f"Unmatched '}}' at {i.index - 2}!")
|
|
|
|
def get_octal(self, c: str, i: _util.StringIter) -> str | None:
|
|
"""Get octal."""
|
|
|
|
index = i.index
|
|
value = []
|
|
zero_count = 0
|
|
try:
|
|
if c == '0':
|
|
for _ in range(3):
|
|
if c != '0':
|
|
break
|
|
value.append(c)
|
|
c = next(i)
|
|
zero_count = len(value)
|
|
if zero_count < 3:
|
|
for _ in range(3 - zero_count):
|
|
if c not in _OCTAL:
|
|
break
|
|
value.append(c)
|
|
c = next(i)
|
|
i.rewind(1)
|
|
except StopIteration:
|
|
pass
|
|
|
|
octal_count = len(value)
|
|
if not (self.use_format and octal_count) and not (zero_count and octal_count < 3) and octal_count != 3:
|
|
i.rewind(i.index - index)
|
|
value = []
|
|
|
|
return ''.join(value) if value else None
|
|
|
|
def parse_octal(self, text: str, i: _util.StringIter) -> None:
|
|
"""Parse octal value."""
|
|
|
|
value = int(text, 8)
|
|
if value > 0xFF and self.is_bytes:
|
|
# Re fails on octal greater than `0o377` or `0xFF`
|
|
raise ValueError("octal escape value outside of range 0-0o377!")
|
|
else:
|
|
single = self.get_single_stack()
|
|
if self.span_stack:
|
|
text = self.convert_case(chr(value), self.span_stack[-1])
|
|
value = ord(self.convert_case(text, single)) if single is not None else ord(text)
|
|
elif single:
|
|
value = ord(self.convert_case(chr(value), single))
|
|
if self.use_format and value in _CURLY_BRACKETS_ORD:
|
|
self.handle_format(chr(value), i)
|
|
elif value <= 0xFF:
|
|
self.result.append(f'\\{value:03o}')
|
|
else:
|
|
self.result.append(chr(value))
|
|
|
|
def get_named_unicode(self, i: _util.StringIter) -> str:
|
|
"""Get named Unicode."""
|
|
|
|
index = i.index
|
|
value = []
|
|
try:
|
|
if next(i) != '{':
|
|
raise SyntaxError(f"Named Unicode missing '{{' at {i.index - 1}!")
|
|
c = next(i)
|
|
while c != '}':
|
|
value.append(c)
|
|
c = next(i)
|
|
except StopIteration as e:
|
|
raise SyntaxError(f"Unmatched '}}' at {index}!") from e
|
|
|
|
return ''.join(value)
|
|
|
|
def parse_named_unicode(self, i: _util.StringIter) -> None:
|
|
"""Parse named Unicode."""
|
|
|
|
value = ord(_unicodedata.lookup(self.get_named_unicode(i)))
|
|
single = self.get_single_stack()
|
|
if self.span_stack:
|
|
text = self.convert_case(chr(value), self.span_stack[-1])
|
|
value = ord(self.convert_case(text, single)) if single is not None else ord(text)
|
|
elif single:
|
|
value = ord(self.convert_case(chr(value), single))
|
|
if self.use_format and value in _CURLY_BRACKETS_ORD:
|
|
self.handle_format(chr(value), i)
|
|
elif value <= 0xFF:
|
|
self.result.append(f'\\{value:03o}')
|
|
else:
|
|
self.result.append(chr(value))
|
|
|
|
def get_wide_unicode(self, i: _util.StringIter) -> str:
|
|
"""Get narrow Unicode."""
|
|
|
|
value = []
|
|
for _ in range(3):
|
|
c = next(i)
|
|
if c == '0':
|
|
value.append(c)
|
|
else: # pragma: no cover
|
|
raise SyntaxError(f'Invalid wide Unicode character at {i.index - 1}!')
|
|
|
|
c = next(i)
|
|
if c in ('0', '1'):
|
|
value.append(c)
|
|
else: # pragma: no cover
|
|
raise SyntaxError(f'Invalid wide Unicode character at {i.index - 1}!')
|
|
|
|
for _ in range(4):
|
|
c = next(i)
|
|
if c.lower() in _HEX:
|
|
value.append(c)
|
|
else: # pragma: no cover
|
|
raise SyntaxError(f'Invalid wide Unicode character at {i.index - 1}!')
|
|
return ''.join(value)
|
|
|
|
def get_narrow_unicode(self, i: _util.StringIter) -> str:
|
|
"""Get narrow Unicode."""
|
|
|
|
value = []
|
|
for _ in range(4):
|
|
c = next(i)
|
|
if c.lower() in _HEX:
|
|
value.append(c)
|
|
else: # pragma: no cover
|
|
raise SyntaxError(f'Invalid Unicode character at {i.index - 1}!')
|
|
return ''.join(value)
|
|
|
|
def parse_unicode(self, i: _util.StringIter, wide: bool = False) -> None:
|
|
"""Parse Unicode."""
|
|
|
|
text = self.get_wide_unicode(i) if wide else self.get_narrow_unicode(i)
|
|
value = int(text, 16)
|
|
single = self.get_single_stack()
|
|
if self.span_stack:
|
|
text = self.convert_case(chr(value), self.span_stack[-1])
|
|
value = ord(self.convert_case(text, single)) if single is not None else ord(text)
|
|
elif single:
|
|
value = ord(self.convert_case(chr(value), single))
|
|
if self.use_format and value in _CURLY_BRACKETS_ORD:
|
|
self.handle_format(chr(value), i)
|
|
elif value <= 0xFF:
|
|
self.result.append(f'\\{value:03o}')
|
|
else:
|
|
self.result.append(chr(value))
|
|
|
|
def get_byte(self, i: _util.StringIter) -> str:
|
|
"""Get byte."""
|
|
|
|
value = []
|
|
for _x in range(2):
|
|
c = next(i)
|
|
if c.lower() in _HEX:
|
|
value.append(c)
|
|
else: # pragma: no cover
|
|
raise SyntaxError(f'Invalid byte character at {i.index - 1}!')
|
|
return ''.join(value)
|
|
|
|
def parse_bytes(self, i: _util.StringIter) -> None:
|
|
"""Parse byte."""
|
|
|
|
value = int(self.get_byte(i), 16)
|
|
single = self.get_single_stack()
|
|
if self.span_stack:
|
|
text = self.convert_case(chr(value), self.span_stack[-1])
|
|
value = ord(self.convert_case(text, single)) if single is not None else ord(text)
|
|
elif single:
|
|
value = ord(self.convert_case(chr(value), single))
|
|
if self.use_format and value in _CURLY_BRACKETS_ORD:
|
|
self.handle_format(chr(value), i)
|
|
else:
|
|
self.result.append(f'\\{value:03o}')
|
|
|
|
def get_named_group(self, t: str, i: _util.StringIter) -> str:
|
|
"""Get group number."""
|
|
|
|
index = i.index
|
|
value = [t]
|
|
try:
|
|
c = next(i)
|
|
if c != "<":
|
|
raise SyntaxError(f"Group missing '<' at {i.index - 1}!")
|
|
value.append(c)
|
|
c = next(i)
|
|
if c in _DIGIT:
|
|
value.append(c)
|
|
c = next(i)
|
|
while c != '>':
|
|
if c in _DIGIT:
|
|
value.append(c)
|
|
c = next(i)
|
|
value.append(c)
|
|
elif c in _LETTERS_UNDERSCORE:
|
|
value.append(c)
|
|
c = next(i)
|
|
while c != '>':
|
|
if c in _WORD:
|
|
value.append(c)
|
|
c = next(i)
|
|
value.append(c)
|
|
else:
|
|
raise SyntaxError(f"Invalid group character at {i.index - 1}!")
|
|
except StopIteration as e:
|
|
raise SyntaxError(f"Unmatched '<' at {index}!") from e
|
|
|
|
return ''.join(value)
|
|
|
|
def get_group(self, t: str, i: _util.StringIter) -> str | None:
|
|
"""Get group number."""
|
|
|
|
value = []
|
|
try:
|
|
if t in _DIGIT and t != '0':
|
|
value.append(t)
|
|
t = next(i)
|
|
if t in _DIGIT:
|
|
value.append(t)
|
|
else:
|
|
i.rewind(1)
|
|
except StopIteration:
|
|
pass
|
|
return ''.join(value) if value else None
|
|
|
|
def format_next(self, i: _util.StringIter) -> str:
|
|
"""Get next format char."""
|
|
|
|
c = next(i)
|
|
return self.format_references(next(i), i) if c == '\\' else c
|
|
|
|
def format_references(self, t: str, i: _util.StringIter) -> str:
|
|
"""Handle format references."""
|
|
|
|
octal = self.get_octal(t, i)
|
|
if octal:
|
|
o = int(octal, 8)
|
|
if o > 0xFF and self.is_bytes:
|
|
# Re fails on octal greater than `0o377` or `0xFF`
|
|
raise ValueError("octal escape value outside of range 0-0o377!")
|
|
value = chr(o)
|
|
elif t in _STANDARD_ESCAPES or t == '\\':
|
|
value = _BACK_SLASH_TRANSLATION['\\' + t]
|
|
elif not self.is_bytes and t == "U":
|
|
value = chr(int(self.get_wide_unicode(i), 16))
|
|
elif not self.is_bytes and t == "u":
|
|
value = chr(int(self.get_narrow_unicode(i), 16))
|
|
elif not self.is_bytes and t == "N":
|
|
value = _unicodedata.lookup(self.get_named_unicode(i))
|
|
elif t == "x":
|
|
value = chr(int(self.get_byte(i), 16))
|
|
else:
|
|
i.rewind(1)
|
|
value = '\\'
|
|
return value
|
|
|
|
def reference(self, t: str, i: _util.StringIter) -> None:
|
|
"""Handle references."""
|
|
octal = self.get_octal(t, i)
|
|
if t in _OCTAL and octal:
|
|
self.parse_octal(octal, i)
|
|
elif (t in _DIGIT or t == 'g') and not self.use_format:
|
|
group = self.get_group(t, i)
|
|
if not group:
|
|
group = self.get_named_group(t, i)
|
|
self.handle_group('\\' + group)
|
|
elif t in _STANDARD_ESCAPES:
|
|
self.get_single_stack()
|
|
self.result.append('\\' + t)
|
|
elif t == "l":
|
|
self.single_case(i, _LOWER)
|
|
elif t == "L":
|
|
self.span_case(i, _LOWER)
|
|
elif t == "c":
|
|
self.single_case(i, _UPPER)
|
|
elif t == "C":
|
|
self.span_case(i, _UPPER)
|
|
elif t == "E":
|
|
self.end_found = True
|
|
elif not self.is_bytes and t == "U":
|
|
self.parse_unicode(i, True)
|
|
elif not self.is_bytes and t == "u":
|
|
self.parse_unicode(i)
|
|
elif not self.is_bytes and t == "N":
|
|
self.parse_named_unicode(i)
|
|
elif t == "x":
|
|
self.parse_bytes(i)
|
|
elif self.use_format and t in _CURLY_BRACKETS:
|
|
self.result.append('\\\\')
|
|
self.handle_format(t, i)
|
|
elif self.use_format and t == 'g':
|
|
self.result.append('\\\\')
|
|
self.result.append(t)
|
|
else:
|
|
value = '\\' + t
|
|
self.get_single_stack()
|
|
if self.span_stack:
|
|
value = self.convert_case(value, self.span_stack[-1])
|
|
self.result.append(value)
|
|
|
|
def _parse_template(self, template: str) -> str:
|
|
"""Parse template."""
|
|
|
|
self.result = [""]
|
|
|
|
i = _util.StringIter(template)
|
|
|
|
try:
|
|
while True:
|
|
t = next(i)
|
|
if self.use_format and t in _CURLY_BRACKETS:
|
|
self.handle_format(t, i)
|
|
elif t == '\\':
|
|
try:
|
|
t = next(i)
|
|
self.reference(t, i)
|
|
except StopIteration:
|
|
self.result.append(t)
|
|
raise
|
|
else:
|
|
self.result.append(t)
|
|
except StopIteration:
|
|
pass
|
|
|
|
if len(self.result) > 1:
|
|
self.literal_slots.append("".join(self.result))
|
|
del self.result[:]
|
|
self.result.append("")
|
|
self.slot += 1
|
|
|
|
return "".join(self.literal_slots)
|
|
|
|
def parse_template(self) -> None:
|
|
"""Parse template."""
|
|
|
|
if isinstance(self._original, bytes):
|
|
self._template = self._parse_template(self._original.decode('latin-1')).encode('latin-1')
|
|
else:
|
|
self._template = self._parse_template(self._original)
|
|
|
|
if _util.PY312:
|
|
count = 0
|
|
for part in _parser.parse_template(self._template, self.pattern):
|
|
if isinstance(part, int):
|
|
self.groups.append((count, part))
|
|
self.literals.append(None)
|
|
elif part:
|
|
self.literals.append(cast(AnyStr, part))
|
|
else:
|
|
continue
|
|
count += 1
|
|
else:
|
|
self.groups, self.literals = _parser.parse_template(self._template, self.pattern)
|
|
|
|
def span_case(self, i: _util.StringIter, case: int) -> None:
|
|
"""Uppercase or lowercase the next range of characters until end marker is found."""
|
|
|
|
# A new \L, \C or \E should pop the last in the stack.
|
|
if self.span_stack:
|
|
self.span_stack.pop()
|
|
if self.single_stack:
|
|
self.single_stack.pop()
|
|
self.span_stack.append(case)
|
|
count = len(self.span_stack)
|
|
self.end_found = False
|
|
try:
|
|
while not self.end_found:
|
|
t = next(i)
|
|
if self.use_format and t in _CURLY_BRACKETS:
|
|
self.handle_format(t, i)
|
|
elif t == '\\':
|
|
try:
|
|
t = next(i)
|
|
self.reference(t, i)
|
|
except StopIteration:
|
|
self.result.append(t)
|
|
raise
|
|
else:
|
|
self.result.append(self.convert_case(t, case))
|
|
if self.end_found or count > len(self.span_stack):
|
|
self.end_found = False
|
|
break
|
|
except StopIteration:
|
|
pass
|
|
if count == len(self.span_stack):
|
|
self.span_stack.pop()
|
|
|
|
def convert_case(self, value: str, case: int) -> str:
|
|
"""Convert case."""
|
|
|
|
if self.is_bytes:
|
|
cased = []
|
|
for c in value:
|
|
if c in _ASCII_LETTERS:
|
|
cased.append(c.lower() if case == _LOWER else c.upper())
|
|
else:
|
|
cased.append(c)
|
|
return "".join(cased)
|
|
else:
|
|
return value.lower() if case == _LOWER else value.upper()
|
|
|
|
def single_case(self, i: _util.StringIter, case: int) -> None:
|
|
"""Uppercase or lowercase the next character."""
|
|
|
|
# Pop a previous case if we have consecutive ones.
|
|
if self.single_stack:
|
|
self.single_stack.pop()
|
|
self.single_stack.append(case)
|
|
try:
|
|
t = next(i)
|
|
if self.use_format and t in _CURLY_BRACKETS:
|
|
self.handle_format(t, i)
|
|
elif t == '\\':
|
|
try:
|
|
t = next(i)
|
|
self.reference(t, i)
|
|
except StopIteration:
|
|
self.result.append(t)
|
|
raise
|
|
elif self.single_stack:
|
|
this_case = self.get_single_stack()
|
|
if this_case is not None:
|
|
self.result.append(self.convert_case(t, this_case))
|
|
except StopIteration:
|
|
pass
|
|
|
|
def get_single_stack(self) -> int | None:
|
|
"""Get the correct single stack item to use."""
|
|
|
|
single = None
|
|
while self.single_stack:
|
|
single = self.single_stack.pop()
|
|
return single
|
|
|
|
def handle_format_group(self, field: str, text: list[tuple[int, Any]]) -> None:
|
|
"""Handle format group."""
|
|
|
|
# Handle auto incrementing group indexes
|
|
if field == '':
|
|
if self.auto:
|
|
field = str(self.auto_index)
|
|
text[0] = (_util.FMT_FIELD, field)
|
|
self.auto_index += 1
|
|
elif not self.manual and not self.auto:
|
|
self.auto = True
|
|
field = str(self.auto_index)
|
|
text[0] = (_util.FMT_FIELD, field)
|
|
self.auto_index += 1
|
|
else:
|
|
raise ValueError("Cannot switch to auto format during manual format!")
|
|
elif not self.manual and not self.auto:
|
|
self.manual = True
|
|
elif not self.manual:
|
|
raise ValueError("Cannot switch to manual format during auto format!")
|
|
|
|
self.handle_group(field, tuple(text), True)
|
|
|
|
def handle_group(
|
|
self,
|
|
text: str,
|
|
capture: tuple[tuple[int, Any], ...] | None = None,
|
|
is_format: bool = False
|
|
) -> None:
|
|
"""Handle groups."""
|
|
|
|
if len(self.result) > 1:
|
|
self.literal_slots.append("".join(self.result))
|
|
if is_format:
|
|
self.literal_slots.extend(["\\g<", text, ">"])
|
|
else:
|
|
self.literal_slots.append(text)
|
|
del self.result[:]
|
|
self.result.append("")
|
|
self.slot += 1
|
|
elif is_format:
|
|
self.literal_slots.extend(["\\g<", text, ">"])
|
|
else:
|
|
self.literal_slots.append(text)
|
|
|
|
self.group_slots.append(
|
|
(
|
|
self.slot,
|
|
(
|
|
(self.span_stack[-1] if self.span_stack else None),
|
|
self.get_single_stack(),
|
|
(() if self.is_bytes else '') if capture is None else capture
|
|
)
|
|
)
|
|
)
|
|
self.slot += 1
|
|
|
|
def get_base_template(self) -> AnyStr:
|
|
"""Return the unmodified template before expansion."""
|
|
|
|
return self._original
|
|
|
|
def parse(self) -> ReplaceTemplate[AnyStr]:
|
|
"""Parse template."""
|
|
|
|
if not isinstance(self.pattern.pattern, type(self._original)):
|
|
raise TypeError('Pattern string type must match replace template string type!')
|
|
|
|
self.parse_template()
|
|
|
|
return ReplaceTemplate(
|
|
tuple(self.groups),
|
|
tuple(self.group_slots),
|
|
tuple(self.literals),
|
|
hash(self.pattern),
|
|
self.use_format,
|
|
self.is_bytes
|
|
)
|
|
|
|
|
|
class ReplaceTemplate(_util.Immutable, Generic[AnyStr]):
|
|
"""Replacement template expander."""
|
|
|
|
__slots__ = ("groups", "group_slots", "literals", "pattern_hash", "use_format", "_hash", "_bytes")
|
|
|
|
groups: tuple[tuple[int, int], ...]
|
|
group_slots: tuple[tuple[int, tuple[int | None, int | None, Any]], ...]
|
|
literals: tuple[AnyStr | None, ...]
|
|
pattern_hash: int
|
|
use_format: bool
|
|
_hash: int
|
|
_bytes: bool
|
|
|
|
def __init__(
|
|
self,
|
|
groups: tuple[tuple[int, int], ...],
|
|
group_slots: tuple[tuple[int, tuple[int | None, int | None, Any]], ...],
|
|
literals: tuple[AnyStr | None, ...],
|
|
pattern_hash: int,
|
|
use_format: bool,
|
|
is_bytes: bool
|
|
) -> None:
|
|
"""Initialize."""
|
|
|
|
super().__init__(
|
|
use_format=use_format,
|
|
groups=groups,
|
|
group_slots=group_slots,
|
|
literals=literals,
|
|
pattern_hash=pattern_hash,
|
|
_bytes=is_bytes,
|
|
_hash=hash(
|
|
(
|
|
type(self),
|
|
groups, group_slots, literals,
|
|
pattern_hash, use_format, is_bytes
|
|
)
|
|
)
|
|
)
|
|
|
|
def __call__(self, m: Match[AnyStr] | None) -> AnyStr:
|
|
"""Call."""
|
|
|
|
return self.expand(m)
|
|
|
|
def __hash__(self) -> int:
|
|
"""Hash."""
|
|
|
|
return self._hash
|
|
|
|
def __eq__(self, other: Any) -> bool:
|
|
"""Equal."""
|
|
|
|
return (
|
|
isinstance(other, ReplaceTemplate) and
|
|
self.groups == other.groups and
|
|
self.group_slots == other.group_slots and
|
|
self.literals == other.literals and
|
|
self.pattern_hash == other.pattern_hash and
|
|
self.use_format == other.use_format and
|
|
self._bytes == other._bytes
|
|
)
|
|
|
|
def __ne__(self, other: Any) -> bool:
|
|
"""Equal."""
|
|
|
|
return (
|
|
not isinstance(other, ReplaceTemplate) or
|
|
self.groups != other.groups or
|
|
self.group_slots != other.group_slots or
|
|
self.literals != other.literals or
|
|
self.pattern_hash != other.pattern_hash or
|
|
self.use_format != other.use_format or
|
|
self._bytes != self._bytes
|
|
)
|
|
|
|
def __repr__(self) -> str: # pragma: no cover
|
|
"""Representation."""
|
|
|
|
return "{}.{}({!r}, {!r}, {!r}, {!r}, {!r})".format(
|
|
self.__module__, self.__class__.__name__,
|
|
self.groups, self.group_slots, self.literals,
|
|
self.pattern_hash, self.use_format
|
|
)
|
|
|
|
def _get_group_index(self, index: int) -> int:
|
|
"""Find and return the appropriate group index."""
|
|
|
|
g_index = 0
|
|
for group in self.groups:
|
|
if group[0] == index:
|
|
g_index = group[1]
|
|
break
|
|
return g_index
|
|
|
|
def _get_group_attributes(self, index: int) -> tuple[int | None, int | None, Any]:
|
|
"""Find and return the appropriate group case."""
|
|
|
|
g_case = (None, None, -1) # type: tuple[int | None, int | None, Any]
|
|
for group in self.group_slots:
|
|
if group[0] == index:
|
|
g_case = group[1]
|
|
break
|
|
return g_case
|
|
|
|
def expand(self, m: Match[AnyStr] | None) -> AnyStr:
|
|
"""Using the template, expand the string."""
|
|
|
|
if m is None:
|
|
raise ValueError("Match is None!")
|
|
|
|
sep = m.string[:0]
|
|
if not isinstance(sep, bytes if self._bytes else str):
|
|
raise TypeError('Match string type does not match expander string type!')
|
|
text = []
|
|
# Expand string
|
|
for x in range(0, len(self.literals)):
|
|
index = x
|
|
l = self.literals[x]
|
|
if l is None:
|
|
g_index = self._get_group_index(index)
|
|
span_case, single_case, capture = self._get_group_attributes(index)
|
|
if not self.use_format:
|
|
# Non format replace
|
|
try:
|
|
l = m.group(g_index)
|
|
if l is None:
|
|
l = sep
|
|
except IndexError as e: # pragma: no cover
|
|
raise IndexError(f"'{g_index}' is out of range!") from e
|
|
else:
|
|
# String format replace
|
|
try:
|
|
obj = m.group(g_index)
|
|
except IndexError as e: # pragma: no cover
|
|
raise IndexError(f"'{g_index}' is out of range!") from e
|
|
l = _util.format_captures(
|
|
[] if obj is None else [obj],
|
|
capture,
|
|
_util._to_bstr if isinstance(sep, bytes) else _util._to_str,
|
|
sep
|
|
)
|
|
if span_case is not None:
|
|
if span_case == _LOWER:
|
|
l = l.lower()
|
|
else:
|
|
l = l.upper()
|
|
if single_case is not None:
|
|
if single_case == _LOWER:
|
|
l = l[0:1].lower() + l[1:]
|
|
else:
|
|
l = l[0:1].upper() + l[1:]
|
|
text.append(l)
|
|
|
|
return sep.join(text)
|
|
|
|
|
|
def _pickle(r): # type: ignore[no-untyped-def]
|
|
"""Pickle."""
|
|
|
|
return ReplaceTemplate, (r.groups, r.group_slots, r.literals, r.pattern_hash, r.use_format, r._bytes)
|
|
|
|
|
|
_copyreg.pickle(ReplaceTemplate, _pickle)
|