witness-fracture/.venv/lib/python3.12/site-packages/internetarchive/utils.py

#
# The internetarchive module is a Python/CLI interface to Archive.org.
#
# Copyright (C) 2012-2024 Internet Archive
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
internetarchive.utils
~~~~~~~~~~~~~~~~~~~~~

This module provides utility functions for the internetarchive library.

:copyright: (C) 2012-2024 by Internet Archive.
:license: AGPL 3, see LICENSE for more details.
"""
from __future__ import annotations

import hashlib
import os
import re
import sys
from collections.abc import Mapping
from typing import Iterable
from xml.dom.minidom import parseString

# Make preferred JSON package available via `from internetarchive.utils import json`
try:
    import ujson as json

    # ujson lacks a JSONDecodeError: https://github.com/ultrajson/ultrajson/issues/497
    JSONDecodeError = ValueError
except ImportError:
    import json  # type: ignore
    JSONDecodeError = json.JSONDecodeError  # type: ignore


def deep_update(d: dict, u: Mapping) -> dict:
    for k, v in u.items():
        if isinstance(v, Mapping):
            r = deep_update(d.get(k, {}), v)
            d[k] = r
        else:
            d[k] = u[k]
    return d


class InvalidIdentifierException(Exception):
    pass


def validate_s3_identifier(string: str) -> bool:
    legal_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-'
    # periods, underscores, and dashes are legal, but may not be the first
    # character!
    if any(string.startswith(c) is True for c in ['.', '_', '-']):
        raise InvalidIdentifierException('Identifier cannot begin with periods ".", underscores '
                                        '"_", or dashes "-".')

    if len(string) > 100 or len(string) < 3:
        raise InvalidIdentifierException('Identifier should be between 3 and 80 characters in '
                                        'length.')

    # Support for uploading to user items, e.g. first character can be `@`.
    if string.startswith('@'):
        string = string[1:]

    if any(c not in legal_chars for c in string):
        raise InvalidIdentifierException('Identifier can only contain alphanumeric characters, '
                                        'periods ".", underscores "_", or dashes "-". However, '
                                        'identifier cannot begin with periods, underscores, or '
                                        'dashes.')

    return True


def needs_quote(s: str) -> bool:
    try:
        s.encode('ascii')
    except (UnicodeDecodeError, UnicodeEncodeError):
        return True
    return re.search(r'\s', s) is not None


def norm_filepath(fp: bytes | str) -> str:
    if isinstance(fp, bytes):
        fp = fp.decode('utf-8')
    fp = fp.replace(os.path.sep, '/')
    if not fp.startswith('/'):
        fp = f'/{fp}'
    return fp


def get_md5(file_object) -> str:
    m = hashlib.md5()
    while True:
        data = file_object.read(8192)
        if not data:
            break
        m.update(data)
    file_object.seek(0, os.SEEK_SET)
    return m.hexdigest()


def chunk_generator(fp, chunk_size: int):
    while True:
        chunk = fp.read(chunk_size)
        if not chunk:
            break
        yield chunk


def suppress_keyboard_interrupt_message() -> None:
    """Register a new excepthook to suppress KeyboardInterrupt
    exception messages, and exit with status code 130.

    """
    old_excepthook = sys.excepthook

    def new_hook(type, value, traceback):
        if type is KeyboardInterrupt:
            sys.exit(130)
        old_excepthook(type, value, traceback)

    sys.excepthook = new_hook


class IterableToFileAdapter:
    def __init__(self, iterable, size: int, pre_encode: bool = False):
        self.iterator = iter(iterable)
        self.length = size
        # pre_encode is needed because http doesn't know that it
        # needs to encode a TextIO object when it's wrapped
        # in the Iterator from tqdm.
        # So, this FileAdapter provides pre-encoded output
        self.pre_encode = pre_encode

    def read(self, size: int = -1):  # TBD: add buffer for `len(data) > size` case
        if self.pre_encode:
            # this adapter is intended to emulate the encoding that is usually
            # done by the http lib.
            # As of 2022, iso-8859-1 encoding is used to meet the HTTP standard,
            # see in the cpython repo (https://github.com/python/cpython
            # Lib/http/client.py lines 246; 1340; or grep 'iso-8859-1'
            return next(self.iterator, '').encode("iso-8859-1")
        return next(self.iterator, b'')

    def __len__(self) -> int:
        return self.length


class IdentifierListAsItems:
    """This class is a lazily-loaded list of Items, accessible by index or identifier.
    """

    def __init__(self, id_list_or_single_id, session):
        self.ids = (id_list_or_single_id
                    if isinstance(id_list_or_single_id, list)
                    else [id_list_or_single_id])
        self._items = [None] * len(self.ids)
        self.session = session

    def __len__(self) -> int:
        return len(self.ids)

    def __getitem__(self, idx):
        for i in (range(*idx.indices(len(self))) if isinstance(idx, slice) else [idx]):
            if self._items[i] is None:
                self._items[i] = self.session.get_item(self.ids[i])
        return self._items[idx]

    def __getattr__(self, name):
        try:
            return self[self.ids.index(name)]
        except ValueError:
            raise AttributeError

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}({self.ids!r})'


def get_s3_xml_text(xml_str: str) -> str:
    def _get_tag_text(tag_name, xml_obj):
        text = ''
        elements = xml_obj.getElementsByTagName(tag_name)
        for e in elements:
            for node in e.childNodes:
                if node.nodeType == node.TEXT_NODE:
                    text += node.data
        return text

    tag_names = ['Message', 'Resource']
    try:
        p = parseString(xml_str)
        _msg = _get_tag_text('Message', p)
        _resource = _get_tag_text('Resource', p)
        # Avoid weird Resource text that contains PUT method.
        if _resource and "'PUT" not in _resource:
            return f'{_msg} - {_resource.strip()}'
        else:
            return _msg
    except Exception:
        return str(xml_str)


def get_file_size(file_obj) -> int | None:
    if is_filelike_obj(file_obj):
        try:
            file_obj.seek(0, os.SEEK_END)
            size = file_obj.tell()
            # Avoid OverflowError.
            if size > sys.maxsize:
                size = None
            file_obj.seek(0, os.SEEK_SET)
        except OSError:
            size = None
    else:
        st = os.stat(file_obj)
        size = st.st_size
    return size


def iter_directory(directory: str):
    """Given a directory, yield all files recursively as a two-tuple (filepath, s3key)"""
    for path, _dir, files in os.walk(directory):
        for f in files:
            filepath = os.path.join(path, f)
            key = os.path.relpath(filepath, directory)
            yield (filepath, key)


def recursive_file_count_and_size(files, item=None, checksum=False):
    """Given a filepath or list of filepaths, return the total number and size of files.
    If `checksum` is `True`, skip over files whose MD5 hash matches any file in the `item`.
    """
    if not isinstance(files, (list, set)):
        files = [files]
    total_files = 0
    total_size = 0
    if checksum is True:
        md5s = [f.get('md5') for f in item.files]
    else:
        md5s = []
    if isinstance(files, dict):
        # make sure to use local filenames.
        _files = files.values()
    else:
        if isinstance(files[0], tuple):
            _files = dict(files).values()
        else:
            _files = files
    for f in _files:
        try:
            is_dir = os.path.isdir(f)
        except TypeError:
            try:
                f = f[0]
                is_dir = os.path.isdir(f)
            except (AttributeError, TypeError):
                is_dir = False
        if is_dir:
            it = iter_directory(f)
        else:
            it = [(f, None)]
        for x, _ in it:
            if checksum is True:
                try:
                    with open(x, 'rb') as fh:
                        lmd5 = get_md5(fh)
                except TypeError:
                    # Support file-like objects.
                    lmd5 = get_md5(x)
                if lmd5 in md5s:
                    continue
            total_size += get_file_size(x)
            total_files += 1
    return total_files, total_size


def recursive_file_count(*args, **kwargs):
    """Like `recursive_file_count_and_size`, but returns only the file count."""
    total_files, _ = recursive_file_count_and_size(*args, **kwargs)
    return total_files


def is_dir(obj) -> bool:
    """Special is_dir function to handle file-like object cases that
    cannot be stat'd"""
    try:
        return os.path.isdir(obj)
    except TypeError as exc:
        return False


def is_filelike_obj(obj) -> bool:
    """Distinguish file-like from path-like objects"""
    try:
        os.fspath(obj)
    except TypeError:
        return True
    else:
        return False


def reraise_modify(
    caught_exc: Exception,
    append_msg: str,
    prepend: bool = False,
) -> None:
    """Append message to exception while preserving attributes.

    Preserves exception class, and exception traceback.

    Note:
        This function needs to be called inside an except because an exception
        must be active in the current scope.

    Args:
        caught_exc(Exception): The caught exception object
        append_msg(str): The message to append to the caught exception
        prepend(bool): If True prepend the message to args instead of appending

    Returns:
        None

    Side Effects:
        Re-raises the exception with the preserved data / trace but
        modified message
    """
    if not caught_exc.args:
        # If no args, create our own tuple
        arg_list = [append_msg]
    else:
        # Take the last arg
        # If it is a string
        # append your message.
        # Otherwise append it to the
        # arg list(Not as pretty)
        arg_list = list(caught_exc.args[:-1])
        last_arg = caught_exc.args[-1]
        if isinstance(last_arg, str):
            if prepend:
                arg_list.append(append_msg + last_arg)
            else:
                arg_list.append(last_arg + append_msg)
        else:
            arg_list += [last_arg, append_msg]
    caught_exc.args = tuple(arg_list)
    raise  # noqa: PLE0704


def remove_none(obj):
    if isinstance(obj, (list, tuple, set)):
        lst = type(obj)(remove_none(x) for x in obj if x)
        try:
            return [dict(t) for t in {tuple(sorted(d.items())) for d in lst}]
        except (AttributeError, TypeError):
            return lst
    elif isinstance(obj, dict):
        return type(obj)((remove_none(k), remove_none(v))
                         for k, v in obj.items() if k is not None and v is not None)
    else:
        return obj


def delete_items_from_dict(d: dict | list, to_delete):
    """Recursively deletes items from a dict,
    if the item's value(s) is in ``to_delete``.
    """
    if not isinstance(to_delete, list):
        to_delete = [to_delete]
    if isinstance(d, dict):
        for single_to_delete in set(to_delete):
            if single_to_delete in d.values():
                for k, v in d.copy().items():
                    if v == single_to_delete:
                        del d[k]
        for v in d.values():
            delete_items_from_dict(v, to_delete)
    elif isinstance(d, list):
        for i in d:
            delete_items_from_dict(i, to_delete)
    return remove_none(d)


def is_valid_metadata_key(name: str) -> bool:
    # According to the documentation a metadata key
    # has to be a valid XML tag name.
    #
    # The actual allowed tag names (at least as tested with the metadata API),
    # are way more restrictive and only allow ".-A-Za-z_", possibly followed
    # by an index in square brackets e. g. [0].
    # On the other hand the Archive allows tags starting with the string "xml".
    return bool(re.fullmatch(r'[A-Za-z][.\-0-9A-Za-z_]+(?:\[[0-9]+\])?', name))


def merge_dictionaries(
    dict0: dict | None,
    dict1: dict | None,
    keys_to_drop: Iterable | None = None,
) -> dict:
    """Merge two dictionaries.

       Items in `dict0` can optionally be dropped before the merge.

       If equal keys exist in both dictionaries,
       entries in`dict0` are overwritten.

       :param dict0: A base dictionary with the bulk of the items.

       :param dict1: Additional items which overwrite the items in `dict0`.

       :param keys_to_drop: An iterable of keys to drop from `dict0` before the merge.

       :returns: A merged dictionary.
       """
    if dict0 is not None:
        new_dict = dict0.copy()
    else:
        new_dict = {}

    if keys_to_drop is not None:
        for key in keys_to_drop:
            new_dict.pop(key, None)

    # Items from `dict1` take precedence over items from `dict0`.
    if dict1 is not None:
        new_dict.update(dict1)

    return new_dict


def parse_dict_cookies(value: str) -> dict[str, str | None]:
    result: dict[str, str | None] = {}
    for item in value.split(';'):
        item = item.strip()
        if not item:
            continue
        if '=' not in item:
            result[item] = None
            continue
        name, value = item.split('=', 1)
        result[name] = value
    if 'domain' not in result:
        result['domain'] = '.archive.org'
    if 'path' not in result:
        result['path'] = '/'
    return result


def is_valid_email(email):
    # Regular expression pattern for a valid email address
    # Ensures the TLD has at least 2 characters
    pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None
Post-Local sync at 2025-06-23T22:46:07Z 2025-06-23 17:55:02 -05:00			`#`
			`# The internetarchive module is a Python/CLI interface to Archive.org.`
			`#`
			`# Copyright (C) 2012-2024 Internet Archive`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as`
			`# published by the Free Software Foundation, either version 3 of the`
			`# License, or (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`

			`"""`
			`internetarchive.utils`
			`~~~~~~~~~~~~~~~~~~~~~`

			`This module provides utility functions for the internetarchive library.`

			`:copyright: (C) 2012-2024 by Internet Archive.`
			`:license: AGPL 3, see LICENSE for more details.`
			`"""`
			`from __future__ import annotations`

			`import hashlib`
			`import os`
			`import re`
			`import sys`
			`from collections.abc import Mapping`
			`from typing import Iterable`
			`from xml.dom.minidom import parseString`

			# Make preferred JSON package available via `from internetarchive.utils import json`
			`try:`
			`import ujson as json`

			`# ujson lacks a JSONDecodeError: https://github.com/ultrajson/ultrajson/issues/497`
			`JSONDecodeError = ValueError`
			`except ImportError:`
			`import json # type: ignore`
			`JSONDecodeError = json.JSONDecodeError # type: ignore`


			`def deep_update(d: dict, u: Mapping) -> dict:`
			`for k, v in u.items():`
			`if isinstance(v, Mapping):`
			`r = deep_update(d.get(k, {}), v)`
			`d[k] = r`
			`else:`
			`d[k] = u[k]`
			`return d`


			`class InvalidIdentifierException(Exception):`
			`pass`


			`def validate_s3_identifier(string: str) -> bool:`
			`legal_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-'`
			`# periods, underscores, and dashes are legal, but may not be the first`
			`# character!`
			`if any(string.startswith(c) is True for c in ['.', '_', '-']):`
			`raise InvalidIdentifierException('Identifier cannot begin with periods ".", underscores '`
			`'"_", or dashes "-".')`

			`if len(string) > 100 or len(string) < 3:`
			`raise InvalidIdentifierException('Identifier should be between 3 and 80 characters in '`
			`'length.')`

			# Support for uploading to user items, e.g. first character can be `@`.
			`if string.startswith('@'):`
			`string = string[1:]`

			`if any(c not in legal_chars for c in string):`
			`raise InvalidIdentifierException('Identifier can only contain alphanumeric characters, '`
			`'periods ".", underscores "_", or dashes "-". However, '`
			`'identifier cannot begin with periods, underscores, or '`
			`'dashes.')`

			`return True`


			`def needs_quote(s: str) -> bool:`
			`try:`
			`s.encode('ascii')`
			`except (UnicodeDecodeError, UnicodeEncodeError):`
			`return True`
			`return re.search(r'\s', s) is not None`


			`def norm_filepath(fp: bytes \| str) -> str:`
			`if isinstance(fp, bytes):`
			`fp = fp.decode('utf-8')`
			`fp = fp.replace(os.path.sep, '/')`
			`if not fp.startswith('/'):`
			`fp = f'/{fp}'`
			`return fp`


			`def get_md5(file_object) -> str:`
			`m = hashlib.md5()`
			`while True:`
			`data = file_object.read(8192)`
			`if not data:`
			`break`
			`m.update(data)`
			`file_object.seek(0, os.SEEK_SET)`
			`return m.hexdigest()`


			`def chunk_generator(fp, chunk_size: int):`
			`while True:`
			`chunk = fp.read(chunk_size)`
			`if not chunk:`
			`break`
			`yield chunk`


			`def suppress_keyboard_interrupt_message() -> None:`
			`"""Register a new excepthook to suppress KeyboardInterrupt`
			`exception messages, and exit with status code 130.`

			`"""`
			`old_excepthook = sys.excepthook`

			`def new_hook(type, value, traceback):`
			`if type is KeyboardInterrupt:`
			`sys.exit(130)`
			`old_excepthook(type, value, traceback)`

			`sys.excepthook = new_hook`


			`class IterableToFileAdapter:`
			`def __init__(self, iterable, size: int, pre_encode: bool = False):`
			`self.iterator = iter(iterable)`
			`self.length = size`
			`# pre_encode is needed because http doesn't know that it`
			`# needs to encode a TextIO object when it's wrapped`
			`# in the Iterator from tqdm.`
			`# So, this FileAdapter provides pre-encoded output`
			`self.pre_encode = pre_encode`

			def read(self, size: int = -1): # TBD: add buffer for `len(data) > size` case
			`if self.pre_encode:`
			`# this adapter is intended to emulate the encoding that is usually`
			`# done by the http lib.`
			`# As of 2022, iso-8859-1 encoding is used to meet the HTTP standard,`
			`# see in the cpython repo (https://github.com/python/cpython`
			`# Lib/http/client.py lines 246; 1340; or grep 'iso-8859-1'`
			`return next(self.iterator, '').encode("iso-8859-1")`
			`return next(self.iterator, b'')`

			`def __len__(self) -> int:`
			`return self.length`


			`class IdentifierListAsItems:`
			`"""This class is a lazily-loaded list of Items, accessible by index or identifier.`
			`"""`

			`def __init__(self, id_list_or_single_id, session):`
			`self.ids = (id_list_or_single_id`
			`if isinstance(id_list_or_single_id, list)`
			`else [id_list_or_single_id])`
			`self._items = [None] * len(self.ids)`
			`self.session = session`

			`def __len__(self) -> int:`
			`return len(self.ids)`

			`def __getitem__(self, idx):`
			`for i in (range(*idx.indices(len(self))) if isinstance(idx, slice) else [idx]):`
			`if self._items[i] is None:`
			`self._items[i] = self.session.get_item(self.ids[i])`
			`return self._items[idx]`

			`def __getattr__(self, name):`
			`try:`
			`return self[self.ids.index(name)]`
			`except ValueError:`
			`raise AttributeError`

			`def __repr__(self) -> str:`
			`return f'{self.__class__.__name__}({self.ids!r})'`


			`def get_s3_xml_text(xml_str: str) -> str:`
			`def _get_tag_text(tag_name, xml_obj):`
			`text = ''`
			`elements = xml_obj.getElementsByTagName(tag_name)`
			`for e in elements:`
			`for node in e.childNodes:`
			`if node.nodeType == node.TEXT_NODE:`
			`text += node.data`
			`return text`

			`tag_names = ['Message', 'Resource']`
			`try:`
			`p = parseString(xml_str)`
			`_msg = _get_tag_text('Message', p)`
			`_resource = _get_tag_text('Resource', p)`
			`# Avoid weird Resource text that contains PUT method.`
			`if _resource and "'PUT" not in _resource:`
			`return f'{_msg} - {_resource.strip()}'`
			`else:`
			`return _msg`
			`except Exception:`
			`return str(xml_str)`


			`def get_file_size(file_obj) -> int \| None:`
			`if is_filelike_obj(file_obj):`
			`try:`
			`file_obj.seek(0, os.SEEK_END)`
			`size = file_obj.tell()`
			`# Avoid OverflowError.`
			`if size > sys.maxsize:`
			`size = None`
			`file_obj.seek(0, os.SEEK_SET)`
			`except OSError:`
			`size = None`
			`else:`
			`st = os.stat(file_obj)`
			`size = st.st_size`
			`return size`


			`def iter_directory(directory: str):`
			`"""Given a directory, yield all files recursively as a two-tuple (filepath, s3key)"""`
			`for path, _dir, files in os.walk(directory):`
			`for f in files:`
			`filepath = os.path.join(path, f)`
			`key = os.path.relpath(filepath, directory)`
			`yield (filepath, key)`


			`def recursive_file_count_and_size(files, item=None, checksum=False):`
			`"""Given a filepath or list of filepaths, return the total number and size of files.`
			If `checksum` is `True`, skip over files whose MD5 hash matches any file in the `item`.
			`"""`
			`if not isinstance(files, (list, set)):`
			`files = [files]`
			`total_files = 0`
			`total_size = 0`
			`if checksum is True:`
			`md5s = [f.get('md5') for f in item.files]`
			`else:`
			`md5s = []`
			`if isinstance(files, dict):`
			`# make sure to use local filenames.`
			`_files = files.values()`
			`else:`
			`if isinstance(files[0], tuple):`
			`_files = dict(files).values()`
			`else:`
			`_files = files`
			`for f in _files:`
			`try:`
			`is_dir = os.path.isdir(f)`
			`except TypeError:`
			`try:`
			`f = f[0]`
			`is_dir = os.path.isdir(f)`
			`except (AttributeError, TypeError):`
			`is_dir = False`
			`if is_dir:`
			`it = iter_directory(f)`
			`else:`
			`it = [(f, None)]`
			`for x, _ in it:`
			`if checksum is True:`
			`try:`
			`with open(x, 'rb') as fh:`
			`lmd5 = get_md5(fh)`
			`except TypeError:`
			`# Support file-like objects.`
			`lmd5 = get_md5(x)`
			`if lmd5 in md5s:`
			`continue`
			`total_size += get_file_size(x)`
			`total_files += 1`
			`return total_files, total_size`


			`def recursive_file_count(args, *kwargs):`
			"""Like `recursive_file_count_and_size`, but returns only the file count."""
			`total_files, _ = recursive_file_count_and_size(args, *kwargs)`
			`return total_files`


			`def is_dir(obj) -> bool:`
			`"""Special is_dir function to handle file-like object cases that`
			`cannot be stat'd"""`
			`try:`
			`return os.path.isdir(obj)`
			`except TypeError as exc:`
			`return False`


			`def is_filelike_obj(obj) -> bool:`
			`"""Distinguish file-like from path-like objects"""`
			`try:`
			`os.fspath(obj)`
			`except TypeError:`
			`return True`
			`else:`
			`return False`


			`def reraise_modify(`
			`caught_exc: Exception,`
			`append_msg: str,`
			`prepend: bool = False,`
			`) -> None:`
			`"""Append message to exception while preserving attributes.`

			`Preserves exception class, and exception traceback.`

			`Note:`
			`This function needs to be called inside an except because an exception`
			`must be active in the current scope.`

			`Args:`
			`caught_exc(Exception): The caught exception object`
			`append_msg(str): The message to append to the caught exception`
			`prepend(bool): If True prepend the message to args instead of appending`

			`Returns:`
			`None`

			`Side Effects:`
			`Re-raises the exception with the preserved data / trace but`
			`modified message`
			`"""`
			`if not caught_exc.args:`
			`# If no args, create our own tuple`
			`arg_list = [append_msg]`
			`else:`
			`# Take the last arg`
			`# If it is a string`
			`# append your message.`
			`# Otherwise append it to the`
			`# arg list(Not as pretty)`
			`arg_list = list(caught_exc.args[:-1])`
			`last_arg = caught_exc.args[-1]`
			`if isinstance(last_arg, str):`
			`if prepend:`
			`arg_list.append(append_msg + last_arg)`
			`else:`
			`arg_list.append(last_arg + append_msg)`
			`else:`
			`arg_list += [last_arg, append_msg]`
			`caught_exc.args = tuple(arg_list)`
			`raise # noqa: PLE0704`


			`def remove_none(obj):`
			`if isinstance(obj, (list, tuple, set)):`
			`lst = type(obj)(remove_none(x) for x in obj if x)`
			`try:`
			`return [dict(t) for t in {tuple(sorted(d.items())) for d in lst}]`
			`except (AttributeError, TypeError):`
			`return lst`
			`elif isinstance(obj, dict):`
			`return type(obj)((remove_none(k), remove_none(v))`
			`for k, v in obj.items() if k is not None and v is not None)`
			`else:`
			`return obj`


			`def delete_items_from_dict(d: dict \| list, to_delete):`
			`"""Recursively deletes items from a dict,`
			if the item's value(s) is in ``to_delete``.
			`"""`
			`if not isinstance(to_delete, list):`
			`to_delete = [to_delete]`
			`if isinstance(d, dict):`
			`for single_to_delete in set(to_delete):`
			`if single_to_delete in d.values():`
			`for k, v in d.copy().items():`
			`if v == single_to_delete:`
			`del d[k]`
			`for v in d.values():`
			`delete_items_from_dict(v, to_delete)`
			`elif isinstance(d, list):`
			`for i in d:`
			`delete_items_from_dict(i, to_delete)`
			`return remove_none(d)`


			`def is_valid_metadata_key(name: str) -> bool:`
			`# According to the documentation a metadata key`
			`# has to be a valid XML tag name.`
			`#`
			`# The actual allowed tag names (at least as tested with the metadata API),`
			`# are way more restrictive and only allow ".-A-Za-z_", possibly followed`
			`# by an index in square brackets e. g. [0].`
			`# On the other hand the Archive allows tags starting with the string "xml".`
			`return bool(re.fullmatch(r'[A-Za-z][.\-0-9A-Za-z_]+(?:\[[0-9]+\])?', name))`


			`def merge_dictionaries(`
			`dict0: dict \| None,`
			`dict1: dict \| None,`
			`keys_to_drop: Iterable \| None = None,`
			`) -> dict:`
			`"""Merge two dictionaries.`

			Items in `dict0` can optionally be dropped before the merge.

			`If equal keys exist in both dictionaries,`
			entries in`dict0` are overwritten.

			`:param dict0: A base dictionary with the bulk of the items.`

			:param dict1: Additional items which overwrite the items in `dict0`.

			:param keys_to_drop: An iterable of keys to drop from `dict0` before the merge.

			`:returns: A merged dictionary.`
			`"""`
			`if dict0 is not None:`
			`new_dict = dict0.copy()`
			`else:`
			`new_dict = {}`

			`if keys_to_drop is not None:`
			`for key in keys_to_drop:`
			`new_dict.pop(key, None)`

			# Items from `dict1` take precedence over items from `dict0`.
			`if dict1 is not None:`
			`new_dict.update(dict1)`

			`return new_dict`


			`def parse_dict_cookies(value: str) -> dict[str, str \| None]:`
			`result: dict[str, str \| None] = {}`
			`for item in value.split(';'):`
			`item = item.strip()`
			`if not item:`
			`continue`
			`if '=' not in item:`
			`result[item] = None`
			`continue`
			`name, value = item.split('=', 1)`
			`result[name] = value`
			`if 'domain' not in result:`
			`result['domain'] = '.archive.org'`
			`if 'path' not in result:`
			`result['path'] = '/'`
			`return result`


			`def is_valid_email(email):`
			`# Regular expression pattern for a valid email address`
			`# Ensures the TLD has at least 2 characters`
			`pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}$'`
			`return re.match(pattern, email) is not None`