""" This module provides common classes for the gitignore patterns. """ import re from typing import ( Literal) from pathspec.pattern import ( RegexPattern) from pathspec._typing import ( AnyStr, # Removed in 3.18. assert_unreachable) _BYTES_ENCODING = 'latin1' """ The encoding to use when parsing a byte string pattern. """ class _GitIgnoreBasePattern(RegexPattern): """ .. warning:: This class is not part of the public API. It is subject to change. The :class:`_GitIgnoreBasePattern` class is the base implementation for a compiled gitignore pattern. """ # Keep the dict-less class hierarchy. __slots__ = () @staticmethod def escape(s: AnyStr) -> AnyStr: """ Escape special characters in the given string. *s* (:class:`str` or :class:`bytes`) a filename or a string that you want to escape, usually before adding it to a ".gitignore". Returns the escaped string (:class:`str` or :class:`bytes`). """ if isinstance(s, str): return_type = str string = s elif isinstance(s, bytes): return_type = bytes string = s.decode(_BYTES_ENCODING) else: raise TypeError(f"s:{s!r} is not a unicode or byte string.") # Reference: https://git-scm.com/docs/gitignore#_pattern_format out_string = ''.join((f"\\{x}" if x in '\\[]!*#?' else x) for x in string) if return_type is bytes: out_bytes = out_string.encode(_BYTES_ENCODING) return out_bytes # type: ignore[return-value] else: return out_string # type: ignore[return-value] @staticmethod def _translate_segment_glob( pattern: str, range_error: Literal['literal', 'raise'], ) -> str: """ Translates the glob pattern to a regular expression. This is used in the constructor to translate a path segment glob pattern to its corresponding regular expression. *pattern* (:class:`str`) is the glob pattern. *range_error* (:class:`int`) is how to handle invalid range notation in the pattern: - :data:`"literal"`: Invalid notation will be treated as a literal string. - :data:`"raise"`: Invalid notation will cause a :class:`_RangeError` to be raised. Returns the regular expression (:class:`str`). """ # NOTE: This is derived from `fnmatch.translate()` and is similar to the # POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. escape = False regex = '' i, end = 0, len(pattern) while i < end: # Get next character. char = pattern[i] i += 1 if escape: # Escape the character. escape = False regex += re.escape(char) elif char == '\\': # Escape character, escape next character. escape = True elif char == '*': # Multi-character wildcard. Match any string (except slashes), including # an empty string. regex += '[^/]*' elif char == '?': # Single-character wildcard. Match any single character (except a # slash). regex += '[^/]' elif char == '[': # Bracket expression (range notation) wildcard. Except for the beginning # exclamation mark, the whole bracket expression can be used directly as # regex, but we have to find where the expression ends. # - "[][!]" matches ']', '[' and '!'. # - "[]-]" matches ']' and '-'. # - "[!]a-]" matches any character except ']', 'a' and '-'. j = i # Pass bracket expression negation. if j < end and (pattern[j] == '!' or pattern[j] == '^'): j += 1 # Pass first closing bracket if it is at the beginning of the # expression. if j < end and pattern[j] == ']': j += 1 # Find closing bracket. Stop once we reach the end or find it. while j < end and pattern[j] != ']': j += 1 if j < end: # Found end of bracket expression. Increment j to be one past the # closing bracket: # # [...] # ^ ^ # i j # j += 1 expr = '[' if pattern[i] == '!': # Bracket expression needs to be negated. expr += '^' i += 1 elif pattern[i] == '^': # POSIX declares that the regex bracket expression negation "[^...]" # is undefined in a glob pattern. Python's `fnmatch.translate()` # escapes the caret ('^') as a literal. Git supports the using a # caret for negation. Maintain consistency with Git because that is # the expected behavior. expr += '^' i += 1 # Build regex bracket expression. Escape slashes so they are treated # as literal slashes by regex as defined by POSIX. expr += pattern[i:j].replace('\\', '\\\\') # Add regex bracket expression to regex result. regex += expr # Set i to one past the closing bracket. i = j else: # Failed to find closing bracket. if range_error == 'literal': # Treat opening bracket as a bracket literal instead of as an # expression. regex += '\\[' elif range_error == 'raise': # Treat invalid range notation as an error. raise _RangeError(( f"Invalid range notation={pattern[i:j]!r} found in pattern=" f"{pattern!r}." )) else: assert_unreachable(f"{range_error=!r} is invalid.") else: # Regular character, escape it for regex. regex += re.escape(char) if escape: raise ValueError(( f"Escape character found with no next character to escape: {pattern!r}" )) # ValueError return regex class GitIgnorePatternError(ValueError): """ The :class:`GitIgnorePatternError` class indicates an invalid gitignore pattern. """ pass class _RangeError(GitIgnorePatternError): """ The :class:`_RangeError` class indicates an invalid range notation was found in a gitignore pattern. """ pass