Post-Local sync at 2025-06-23T22:46:07Z
This commit is contained in:
parent
9d33b42020
commit
9f97801b0d
1387 changed files with 250216 additions and 117 deletions
|
|
@ -0,0 +1,58 @@
|
|||
#
|
||||
# The internetarchive module is a Python/CLI interface to Archive.org.
|
||||
#
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
internetarchive.cli
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
:copyright: (C) 2012-2024 by Internet Archive.
|
||||
:license: AGPL 3, see LICENSE for more details.
|
||||
"""
|
||||
from internetarchive.cli import (
|
||||
cli_utils,
|
||||
ia,
|
||||
ia_account,
|
||||
ia_configure,
|
||||
ia_copy,
|
||||
ia_delete,
|
||||
ia_download,
|
||||
ia_list,
|
||||
ia_metadata,
|
||||
ia_move,
|
||||
ia_reviews,
|
||||
ia_search,
|
||||
ia_tasks,
|
||||
ia_upload,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"cli_utils",
|
||||
"ia",
|
||||
"ia_account",
|
||||
"ia_configure",
|
||||
"ia_copy",
|
||||
"ia_delete",
|
||||
"ia_download",
|
||||
"ia_list",
|
||||
"ia_metadata",
|
||||
"ia_move",
|
||||
"ia_reviews",
|
||||
"ia_search",
|
||||
"ia_tasks",
|
||||
"ia_upload",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,210 @@
|
|||
"""
|
||||
interneratchive.cli.cli_utils
|
||||
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from collections.abc import Iterable
|
||||
from typing import Mapping
|
||||
from urllib.parse import parse_qsl
|
||||
|
||||
from internetarchive.utils import InvalidIdentifierException, validate_s3_identifier
|
||||
|
||||
|
||||
def get_args_dict(args: list[str],
|
||||
query_string: bool = False,
|
||||
header: bool = False) -> dict:
|
||||
args = args or []
|
||||
if not isinstance(args, list):
|
||||
args = [args]
|
||||
metadata: dict[str, list | str] = defaultdict(list)
|
||||
for md in args:
|
||||
if query_string:
|
||||
if (":" in md) and ("=" not in md):
|
||||
md = md.replace(":", "=").replace(";", "&")
|
||||
for key, value in parse_qsl(md):
|
||||
assert value
|
||||
metadata[key] = value
|
||||
else:
|
||||
key, value = md.split(":", 1)
|
||||
assert value
|
||||
if value not in metadata[key]:
|
||||
metadata[key].append(value) # type: ignore
|
||||
|
||||
for key in metadata: # noqa: PLC0206
|
||||
# Flatten single item lists.
|
||||
if len(metadata[key]) <= 1:
|
||||
metadata[key] = metadata[key][0]
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def convert_str_list_to_unicode(str_list: list[bytes]):
|
||||
encoding = sys.getfilesystemencoding()
|
||||
return [b.decode(encoding) for b in str_list]
|
||||
|
||||
|
||||
def validate_identifier(identifier):
|
||||
try:
|
||||
validate_s3_identifier(identifier)
|
||||
except InvalidIdentifierException as e:
|
||||
raise argparse.ArgumentTypeError(str(e))
|
||||
return identifier
|
||||
|
||||
|
||||
def flatten_list(lst):
|
||||
"""Flatten a list if it contains lists."""
|
||||
result = []
|
||||
for item in lst:
|
||||
if isinstance(item, Iterable) and not isinstance(item, str):
|
||||
result.extend(flatten_list(item)) # Recursively flatten
|
||||
else:
|
||||
result.append(item) # Just append the item if it's not a list
|
||||
return result
|
||||
|
||||
|
||||
class FlattenListAction(argparse.Action):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
# Flatten the list of values (if nested)
|
||||
flattened = flatten_list(values)
|
||||
# Initialize the attribute if it doesn't exist yet
|
||||
if getattr(namespace, self.dest, None) is None:
|
||||
setattr(namespace, self.dest, [])
|
||||
# Append the flattened list to the existing attribute
|
||||
getattr(namespace, self.dest).extend(flattened)
|
||||
|
||||
|
||||
class PostDataAction(argparse.Action):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
current_value = getattr(namespace, self.dest, None)
|
||||
|
||||
# Split values into individual JSON objects (if needed) and parse them
|
||||
all_values = []
|
||||
for value in values:
|
||||
try:
|
||||
obj = json.loads(value)
|
||||
all_values.append(obj)
|
||||
except json.JSONDecodeError as e:
|
||||
parser.error(f"Invalid JSON format for post data: {value}")
|
||||
|
||||
# If there is no current value (first argument), initialize it as an object or list
|
||||
if current_value is None:
|
||||
# If there's only one value, don't wrap it in a list
|
||||
if len(all_values) == 1:
|
||||
post_data = all_values[0]
|
||||
else:
|
||||
post_data = all_values
|
||||
elif isinstance(current_value, list):
|
||||
# If it's already a list, append the new values to it
|
||||
post_data = current_value + all_values
|
||||
else:
|
||||
# If it's a single object (first argument), convert it into a list and append new data
|
||||
post_data = [current_value] + all_values
|
||||
|
||||
# Set the final value back to the namespace
|
||||
setattr(namespace, self.dest, post_data)
|
||||
|
||||
|
||||
class QueryStringAction(argparse.Action):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
# Initialize the destination as an empty dictionary if it doesn't exist
|
||||
if getattr(namespace, self.dest, None) is None:
|
||||
setattr(namespace, self.dest, {})
|
||||
|
||||
for sublist in values:
|
||||
if "=" not in sublist and ":" in sublist:
|
||||
sublist = sublist.replace(":", "=", 1)
|
||||
key_value_pairs = parse_qsl(sublist)
|
||||
|
||||
if sublist and not key_value_pairs:
|
||||
parser.error(f"{option_string} must be formatted as 'key=value' "
|
||||
"or 'key:value'")
|
||||
|
||||
for key, value in key_value_pairs:
|
||||
current_dict = getattr(namespace, self.dest)
|
||||
if key in current_dict:
|
||||
current_dict[key].append(value)
|
||||
else:
|
||||
current_dict[key] = [value]
|
||||
|
||||
current_dict = getattr(namespace, self.dest)
|
||||
for key, value in current_dict.items():
|
||||
if len(value) == 1:
|
||||
current_dict[key] = value[0]
|
||||
|
||||
|
||||
class MetadataAction(argparse.Action):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
# Initialize the destination as an empty dictionary if it doesn't exist
|
||||
if getattr(namespace, self.dest, None) is None:
|
||||
setattr(namespace, self.dest, {})
|
||||
|
||||
for sublist in values:
|
||||
if ":" not in sublist and "=" in sublist:
|
||||
sublist = sublist.replace("=", ":", 1)
|
||||
try:
|
||||
key, value = sublist.split(":", 1)
|
||||
except ValueError:
|
||||
parser.error(f"{option_string} must be formatted as 'KEY:VALUE'")
|
||||
|
||||
current_dict = getattr(namespace, self.dest)
|
||||
if key in current_dict:
|
||||
if not isinstance(current_dict[key], list):
|
||||
current_dict[key] = [current_dict[key]]
|
||||
current_dict[key].append(value)
|
||||
else:
|
||||
current_dict[key] = value
|
||||
|
||||
|
||||
def validate_dir_path(path):
|
||||
"""
|
||||
Check if the given path is a directory that exists.
|
||||
|
||||
Args:
|
||||
path (str): The path to check.
|
||||
|
||||
Returns:
|
||||
str: The validated directory path.
|
||||
|
||||
Raises:
|
||||
argparse.ArgumentTypeError: If the path is not a valid directory.
|
||||
"""
|
||||
if os.path.isdir(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(f"'{path}' is not a valid directory")
|
||||
|
||||
|
||||
def exit_on_signal(sig, frame):
|
||||
"""
|
||||
Exit the program cleanly upon receiving a specified signal.
|
||||
|
||||
This function is designed to be used as a signal handler. When a signal
|
||||
(such as SIGINT or SIGPIPE) is received, it exits the program with an
|
||||
exit code of 128 plus the signal number. This convention helps to
|
||||
distinguish between regular exit codes and those caused by signals.
|
||||
"""
|
||||
exit_code = 128 + sig
|
||||
sys.exit(exit_code)
|
||||
154
.venv/lib/python3.12/site-packages/internetarchive/cli/ia.py
Normal file
154
.venv/lib/python3.12/site-packages/internetarchive/cli/ia.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
ia.py
|
||||
|
||||
The internetarchive module is a Python/CLI interface to Archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from internetarchive import __version__, get_session
|
||||
from internetarchive.cli import (
|
||||
ia_account,
|
||||
ia_configure,
|
||||
ia_copy,
|
||||
ia_delete,
|
||||
ia_download,
|
||||
ia_flag,
|
||||
ia_list,
|
||||
ia_metadata,
|
||||
ia_move,
|
||||
ia_reviews,
|
||||
ia_search,
|
||||
ia_simplelists,
|
||||
ia_tasks,
|
||||
ia_upload,
|
||||
)
|
||||
from internetarchive.cli.cli_utils import exit_on_signal
|
||||
|
||||
# Handle broken pipe
|
||||
try:
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
||||
except AttributeError:
|
||||
# Non-unix support
|
||||
pass
|
||||
|
||||
# Handle <Ctrl-C>
|
||||
signal.signal(signal.SIGINT, exit_on_signal)
|
||||
|
||||
|
||||
def validate_config_path(path):
|
||||
"""
|
||||
Validate the path to the configuration file.
|
||||
|
||||
Returns:
|
||||
str: Validated path to the configuration file.
|
||||
"""
|
||||
if "configure" not in sys.argv: # Support for adding config to specific file
|
||||
file_check = argparse.FileType("r")
|
||||
file_check(path)
|
||||
return path
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point for the CLI.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="A command line interface to Archive.org.",
|
||||
epilog=("Documentation for 'ia' is available at:\n\n\t"
|
||||
"https://archive.org/developers/internetarchive/cli.html\n\n"
|
||||
"See 'ia {command} --help' for help on a specific command."),
|
||||
formatter_class=argparse.RawTextHelpFormatter) # support for \n in epilog
|
||||
|
||||
parser.add_argument("-v", "--version",
|
||||
action="version",
|
||||
version=__version__)
|
||||
parser.add_argument("-c", "--config-file",
|
||||
action="store",
|
||||
type=validate_config_path,
|
||||
metavar="FILE",
|
||||
help="path to configuration file")
|
||||
parser.add_argument("-l", "--log",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="enable logging")
|
||||
parser.add_argument("-d", "--debug",
|
||||
action="store_true",
|
||||
help="enable debugging")
|
||||
parser.add_argument("-i", "--insecure",
|
||||
action="store_true",
|
||||
help="allow insecure connections")
|
||||
parser.add_argument("-H", "--host",
|
||||
action="store",
|
||||
help=("host to connect to "
|
||||
"(doesn't work for requests made to s3.us.archive.org)"))
|
||||
|
||||
subparsers = parser.add_subparsers(title="commands",
|
||||
dest="command",
|
||||
metavar="{command}")
|
||||
|
||||
# Add subcommand parsers
|
||||
ia_account.setup(subparsers)
|
||||
ia_configure.setup(subparsers)
|
||||
ia_copy.setup(subparsers)
|
||||
ia_delete.setup(subparsers)
|
||||
ia_download.setup(subparsers)
|
||||
ia_flag.setup(subparsers)
|
||||
ia_list.setup(subparsers)
|
||||
ia_metadata.setup(subparsers)
|
||||
ia_move.setup(subparsers)
|
||||
ia_reviews.setup(subparsers)
|
||||
ia_search.setup(subparsers)
|
||||
ia_simplelists.setup(subparsers)
|
||||
ia_tasks.setup(subparsers)
|
||||
ia_upload.setup(subparsers)
|
||||
|
||||
# Suppress help for alias subcommands
|
||||
args = parser.parse_args()
|
||||
|
||||
config: dict[str, dict] = {}
|
||||
if args.log:
|
||||
config["logging"] = {"level": "INFO"}
|
||||
elif args.debug:
|
||||
config["logging"] = {"level": "DEBUG"}
|
||||
|
||||
if args.insecure:
|
||||
config["general"] = {"secure": False}
|
||||
if args.host:
|
||||
if config.get("general"):
|
||||
config["general"]["host"] = args.host
|
||||
else:
|
||||
config["general"] = {"host": args.host}
|
||||
|
||||
args.session = get_session(config_file=args.config_file,
|
||||
config=config,
|
||||
debug=args.debug)
|
||||
|
||||
# Check if any arguments were provided
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
args.func(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
"""
|
||||
ia_account.py
|
||||
|
||||
'ia' subcommand for configuring 'ia' with your archive.org credentials.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2025 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
from internetarchive import configure
|
||||
from internetarchive.account import Account
|
||||
from internetarchive.exceptions import AccountAPIError
|
||||
from internetarchive.utils import is_valid_email
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for configure command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("account",
|
||||
aliases=["ac"],
|
||||
description=(
|
||||
"Manage an archive.org account.\n\n"
|
||||
"Note: This command requires administrative "
|
||||
"privileges. "
|
||||
),
|
||||
help=("Manage an archive.org account. "
|
||||
"Note: requires admin privileges"))
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
parser.add_argument("user",
|
||||
help="Email address, screenname, or itemname "
|
||||
"for an archive.org account")
|
||||
group.add_argument("-g", "--get-email",
|
||||
action="store_true",
|
||||
help="Print the email address associated with the user and exit")
|
||||
group.add_argument("-s", "--get-screenname",
|
||||
action="store_true",
|
||||
help="Print the screenname associated with the user and exit")
|
||||
group.add_argument("-i", "--get-itemname",
|
||||
action="store_true",
|
||||
help="Print the itemname associated with the user and exit")
|
||||
group.add_argument("-l", "--is-locked",
|
||||
action="store_true",
|
||||
help="Check if an account is locked")
|
||||
group.add_argument("-L", "--lock",
|
||||
action="store_true",
|
||||
help="Lock an account")
|
||||
group.add_argument("-u", "--unlock",
|
||||
action="store_true",
|
||||
help="Unlock an account")
|
||||
|
||||
parser.add_argument("-c", "--comment",
|
||||
type=str,
|
||||
help="Comment to include with lock/unlock action")
|
||||
|
||||
parser.set_defaults(func=main)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Main entrypoint for 'ia account'.
|
||||
"""
|
||||
try:
|
||||
if args.user.startswith('@'):
|
||||
account = Account.from_account_lookup('itemname', args.user)
|
||||
elif not is_valid_email(args.user):
|
||||
account = Account.from_account_lookup('screenname', args.user)
|
||||
else:
|
||||
account = Account.from_account_lookup('email', args.user)
|
||||
except AccountAPIError as exc:
|
||||
print(json.dumps(exc.error_data))
|
||||
sys.exit(1)
|
||||
|
||||
if args.get_email:
|
||||
print(account.canonical_email)
|
||||
elif args.get_screenname:
|
||||
print(account.screenname)
|
||||
elif args.get_itemname:
|
||||
print(account.itemname)
|
||||
elif args.is_locked:
|
||||
print(account.locked)
|
||||
elif args.lock:
|
||||
r = account.lock(args.comment, session=args.session)
|
||||
print(r.text)
|
||||
elif args.unlock:
|
||||
r = account.unlock(args.comment, session=args.session)
|
||||
print(r.text)
|
||||
else:
|
||||
account_data = account.to_dict()
|
||||
print(json.dumps(account_data))
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
"""
|
||||
ia_configure.py
|
||||
|
||||
'ia' subcommand for configuring 'ia' with your archive.org credentials.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import netrc
|
||||
import sys
|
||||
|
||||
from internetarchive import configure
|
||||
from internetarchive.exceptions import AuthenticationError
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for configure command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("configure",
|
||||
aliases=["co"],
|
||||
help=("configure 'ia' with your "
|
||||
"archive.org credentials"))
|
||||
config_action_group = parser.add_mutually_exclusive_group()
|
||||
|
||||
parser.add_argument("--username", "-u",
|
||||
help=("provide username as an option rather than "
|
||||
"providing it interactively"))
|
||||
parser.add_argument("--password", "-p",
|
||||
help=("provide password as an option rather than "
|
||||
"providing it interactively"))
|
||||
parser.add_argument("--netrc", "-n",
|
||||
action="store_true",
|
||||
help="use netrc file for login")
|
||||
config_action_group.add_argument("--show", "-s",
|
||||
action="store_true",
|
||||
help=("print the current configuration in JSON format, "
|
||||
"redacting secrets and cookies"))
|
||||
config_action_group.add_argument("--check", "-C",
|
||||
action="store_true",
|
||||
help="validate IA-S3 keys (exits 0 if valid, 1 otherwise)")
|
||||
config_action_group.add_argument("--whoami", "-w",
|
||||
action="store_true",
|
||||
help=("uses your IA-S3 keys to retrieve account "
|
||||
"information from archive.org "
|
||||
"about the associated account"))
|
||||
parser.add_argument("--print-cookies", "-c",
|
||||
action="store_true",
|
||||
help="print archive.org logged-in-* cookies")
|
||||
parser.add_argument("--print-auth-header", "-a",
|
||||
action="store_true",
|
||||
help="print an Authorization header with your IA-S3 keys")
|
||||
|
||||
parser.set_defaults(func=main)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Main entrypoint for 'ia configure'.
|
||||
"""
|
||||
if args.print_auth_header:
|
||||
secret = args.session.config.get("s3", {}).get("secret")
|
||||
access = args.session.config.get("s3", {}).get("access")
|
||||
if not secret or not access:
|
||||
print('hi')
|
||||
if not access:
|
||||
print("error: 'access' key not found in config file, try reconfiguring.",
|
||||
file=sys.stderr)
|
||||
elif not secret:
|
||||
print("error: 'secret' key not found in config file, try reconfiguring.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"Authorization: LOW {access}:{secret}")
|
||||
sys.exit()
|
||||
|
||||
if args.print_cookies:
|
||||
user = args.session.config.get("cookies", {}).get("logged-in-user")
|
||||
sig = args.session.config.get("cookies", {}).get("logged-in-sig")
|
||||
if not user or not sig:
|
||||
if not user and not sig:
|
||||
print("error: 'logged-in-user' and 'logged-in-sig' cookies "
|
||||
"not found in config file, try reconfiguring.", file=sys.stderr)
|
||||
elif not user:
|
||||
print("error: 'logged-in-user' cookie not found in config file, "
|
||||
"try reconfiguring.", file=sys.stderr)
|
||||
elif not sig:
|
||||
print("error: 'logged-in-sig' cookie not found in config file, "
|
||||
"try reconfiguring.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"logged-in-user={user}; logged-in-sig={sig}")
|
||||
sys.exit()
|
||||
|
||||
if args.show:
|
||||
config = args.session.config.copy()
|
||||
# Redact S3 secret
|
||||
if 's3' in config:
|
||||
s3_config = config['s3'].copy()
|
||||
if 'secret' in s3_config:
|
||||
s3_config['secret'] = 'REDACTED'
|
||||
config['s3'] = s3_config
|
||||
# Redact logged-in-secret cookie
|
||||
if 'cookies' in config:
|
||||
cookies = config['cookies'].copy()
|
||||
if 'logged-in-sig' in cookies:
|
||||
cookies['logged-in-sig'] = 'REDACTED'
|
||||
config['cookies'] = cookies
|
||||
print(json.dumps(config))
|
||||
sys.exit()
|
||||
|
||||
if args.whoami:
|
||||
whoami_info = args.session.whoami()
|
||||
print(json.dumps(whoami_info))
|
||||
sys.exit()
|
||||
|
||||
if args.check:
|
||||
whoami_info = args.session.whoami()
|
||||
if whoami_info.get('success') is True:
|
||||
user = whoami_info['value']['username']
|
||||
print(f'The credentials for "{user}" are valid')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print('Your credentials are invalid, check your configuration and try again')
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Netrc
|
||||
if args.netrc:
|
||||
print("Configuring 'ia' with netrc file...", file=sys.stderr)
|
||||
try:
|
||||
n = netrc.netrc()
|
||||
except netrc.NetrcParseError:
|
||||
print("error: netrc.netrc() cannot parse your .netrc file.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("error: .netrc file not found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
username, _, password = n.hosts["archive.org"]
|
||||
config_file_path = configure(username,
|
||||
password or "",
|
||||
config_file=args.session.config_file,
|
||||
host=args.session.host)
|
||||
print(f"Config saved to: {config_file_path}", file=sys.stderr)
|
||||
# Interactive input.
|
||||
else:
|
||||
if not (args.username and args.password):
|
||||
print("Enter your Archive.org credentials below to configure 'ia'.\n")
|
||||
config_file_path = configure(args.username,
|
||||
args.password,
|
||||
config_file=args.session.config_file,
|
||||
host=args.session.host)
|
||||
saved_msg = f"Config saved to: {config_file_path}"
|
||||
if not all([args.username, args.password]):
|
||||
saved_msg = f"\n{saved_msg}"
|
||||
print(saved_msg)
|
||||
|
||||
except AuthenticationError as exc:
|
||||
print(f"\nerror: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
"""
|
||||
ia_copy.py
|
||||
|
||||
'ia' subcommand for copying files on archive.org
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
from requests import Response
|
||||
|
||||
import internetarchive as ia
|
||||
from internetarchive.cli.cli_utils import MetadataAction, QueryStringAction
|
||||
from internetarchive.utils import get_s3_xml_text, merge_dictionaries
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for copy command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("copy",
|
||||
aliases=["cp"],
|
||||
help="Copy files from archive.org items")
|
||||
# Positional arguments
|
||||
parser.add_argument("source",
|
||||
metavar="SOURCE",
|
||||
help="Source file formatted as: identifier/file")
|
||||
parser.add_argument("destination",
|
||||
metavar="DESTINATION",
|
||||
help="Destination file formatted as: identifier/file")
|
||||
|
||||
# Options
|
||||
parser.add_argument("-m", "--metadata",
|
||||
metavar="KEY:VALUE",
|
||||
nargs="+",
|
||||
default={},
|
||||
action=MetadataAction,
|
||||
help=("Metadata to add to your new item, if you are moving the "
|
||||
"file to a new item"))
|
||||
parser.add_argument("--replace-metadata",
|
||||
action="store_true",
|
||||
help=("Only use metadata specified as argument, do not copy any "
|
||||
"from the source item"))
|
||||
parser.add_argument("-H", "--header",
|
||||
metavar="KEY:VALUE",
|
||||
nargs="+",
|
||||
default={},
|
||||
action=QueryStringAction,
|
||||
help="S3 HTTP headers to send with your request")
|
||||
parser.add_argument("--ignore-file-metadata",
|
||||
action="store_true",
|
||||
help="Do not copy file metadata")
|
||||
parser.add_argument("-n", "--no-derive",
|
||||
action="store_true",
|
||||
help="Do not derive uploaded files")
|
||||
parser.add_argument("--no-backup",
|
||||
action="store_true",
|
||||
help=("Turn off archive.org backups, "
|
||||
"clobbered files will not be saved to "
|
||||
"'history/files/$key.~N~'"))
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, "copy", parser))
|
||||
|
||||
|
||||
def assert_src_file_exists(src_location: str) -> bool:
|
||||
"""
|
||||
Assert that the source file exists on archive.org.
|
||||
"""
|
||||
assert SRC_ITEM.exists # type: ignore
|
||||
global SRC_FILE
|
||||
src_filename = src_location.split("/", 1)[-1]
|
||||
SRC_FILE = SRC_ITEM.get_file(src_filename) # type: ignore
|
||||
assert SRC_FILE.exists # type: ignore
|
||||
return True
|
||||
|
||||
|
||||
def main(args: argparse.Namespace,
|
||||
cmd: str,
|
||||
parser: argparse.ArgumentParser) -> tuple[Response, ia.files.File | None]:
|
||||
"""
|
||||
Main entry point for 'ia copy'.
|
||||
"""
|
||||
SRC_FILE = None
|
||||
|
||||
if args.source == args.destination:
|
||||
parser.error("error: The source and destination files cannot be the same!")
|
||||
|
||||
global SRC_ITEM
|
||||
SRC_ITEM = args.session.get_item(args.source.split("/")[0]) # type: ignore
|
||||
SRC_FILE = SRC_ITEM.get_file(args.source.split("/",1)[-1]) # type: ignore
|
||||
|
||||
try:
|
||||
assert_src_file_exists(args.source)
|
||||
except AssertionError:
|
||||
parser.error(f"error: https://{args.session.host}/download/{args.source} "
|
||||
"does not exist. Please check the "
|
||||
"identifier and filepath and retry.")
|
||||
|
||||
args.header["x-amz-copy-source"] = f"/{quote(args.source)}"
|
||||
# Copy the old metadata verbatim if no additional metadata is supplied,
|
||||
# else combine the old and the new metadata in a sensible manner.
|
||||
if args.metadata or args.replace_metadata:
|
||||
args.header["x-amz-metadata-directive"] = "REPLACE"
|
||||
else:
|
||||
args.header["x-amz-metadata-directive"] = "COPY"
|
||||
|
||||
# New metadata takes precedence over old metadata.
|
||||
if not args.replace_metadata:
|
||||
args.metadata = merge_dictionaries(SRC_ITEM.metadata, # type: ignore
|
||||
args.metadata)
|
||||
|
||||
# File metadata is copied by default but can be dropped.
|
||||
file_metadata = None if args.ignore_file_metadata else SRC_FILE.metadata # type: ignore
|
||||
|
||||
# Add keep-old-version by default.
|
||||
if not args.header.get("x-archive-keep-old-version") and not args.no_backup:
|
||||
args.header["x-archive-keep-old-version"] = "1"
|
||||
|
||||
url = f"{args.session.protocol}//s3.us.archive.org/{quote(args.destination)}"
|
||||
queue_derive = not args.no_derive
|
||||
req = ia.iarequest.S3Request(url=url,
|
||||
method="PUT",
|
||||
metadata=args.metadata,
|
||||
file_metadata=file_metadata,
|
||||
headers=args.header,
|
||||
queue_derive=queue_derive,
|
||||
access_key=args.session.access_key,
|
||||
secret_key=args.session.secret_key)
|
||||
p = req.prepare()
|
||||
r = args.session.send(p)
|
||||
if r.status_code != 200:
|
||||
try:
|
||||
msg = get_s3_xml_text(r.text)
|
||||
except Exception as e:
|
||||
msg = r.text
|
||||
print(f"error: failed to {cmd} '{args.source}' to '{args.destination}' - {msg}",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
elif cmd == "copy":
|
||||
print(f"success: copied '{args.source}' to '{args.destination}'.",
|
||||
file=sys.stderr)
|
||||
return (r, SRC_FILE)
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
"""
|
||||
ia_delete.py
|
||||
|
||||
'ia' subcommand for deleting files from archive.org items.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
import requests.exceptions
|
||||
|
||||
from internetarchive.cli.cli_utils import (
|
||||
FlattenListAction,
|
||||
MetadataAction,
|
||||
QueryStringAction,
|
||||
validate_identifier,
|
||||
)
|
||||
from internetarchive.utils import get_s3_xml_text
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for delete command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("delete",
|
||||
aliases=["rm"],
|
||||
help="Delete files from archive.org items")
|
||||
# Positional arguments
|
||||
parser.add_argument("identifier",
|
||||
type=validate_identifier,
|
||||
help="Identifier for the item from which files are to be deleted.")
|
||||
parser.add_argument("file",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Specific file(s) to delete.")
|
||||
|
||||
# Optional arguments
|
||||
parser.add_argument("-q", "--quiet",
|
||||
action="store_true",
|
||||
help="Print status to stdout.")
|
||||
parser.add_argument("-c", "--cascade",
|
||||
action="store_true",
|
||||
help="Delete all associated files including derivatives and the original.")
|
||||
parser.add_argument("-H", "--header",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
default={},
|
||||
metavar="KEY:VALUE",
|
||||
help="S3 HTTP headers to send with your request.")
|
||||
parser.add_argument("-a", "--all",
|
||||
action="store_true",
|
||||
help="Delete all files in the given item. Some files cannot be deleted.")
|
||||
parser.add_argument("-d", "--dry-run",
|
||||
action="store_true",
|
||||
help=("Output files to be deleted to stdout, "
|
||||
"but don't actually delete them."))
|
||||
parser.add_argument("-g", "--glob",
|
||||
type=str,
|
||||
help="Only delete files matching the given pattern.")
|
||||
parser.add_argument("-f", "--format",
|
||||
type=str,
|
||||
nargs="+",
|
||||
action=FlattenListAction,
|
||||
help="Only delete files matching the specified formats.")
|
||||
parser.add_argument("-R", "--retries",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Number of retries on S3 503 SlowDown error.")
|
||||
parser.add_argument("--no-backup",
|
||||
action="store_true",
|
||||
help="Turn off archive.org backups. Clobbered files will not be saved.")
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def get_files_to_delete(args: argparse.Namespace, item) -> list:
|
||||
"""Get files to delete based on command-line arguments."""
|
||||
if args.all:
|
||||
files = list(item.get_files())
|
||||
args.cascade = True
|
||||
elif args.glob:
|
||||
files = item.get_files(glob_pattern=args.glob)
|
||||
elif args.format:
|
||||
files = item.get_files(formats=args.format)
|
||||
else:
|
||||
fnames = [f.strip() for f in (sys.stdin if args.file == ["-"] else args.file)]
|
||||
files = list(item.get_files(fnames))
|
||||
return files
|
||||
|
||||
|
||||
def delete_files(files, args, item, verbose):
|
||||
"""
|
||||
Deletes files from an item.
|
||||
|
||||
Args:
|
||||
files (list): A list of files to delete.
|
||||
args (argparse.Namespace): Parsed command-line arguments.
|
||||
item: The item from which files are being deleted.
|
||||
verbose (bool): If True, verbose output is enabled.
|
||||
|
||||
Returns:
|
||||
bool: True if errors occurred during deletion, False otherwise.
|
||||
"""
|
||||
errors = False
|
||||
|
||||
# Files that cannot be deleted via S3.
|
||||
no_delete = ["_meta.xml", "_files.xml", "_meta.sqlite"]
|
||||
|
||||
for f in files:
|
||||
if not f:
|
||||
if verbose:
|
||||
print(f" error: '{f.name}' does not exist", file=sys.stderr)
|
||||
errors = True
|
||||
continue
|
||||
if any(f.name.endswith(s) for s in no_delete):
|
||||
continue
|
||||
if args.dry_run:
|
||||
print(f" will delete: {item.identifier}/{f.name}", file=sys.stderr)
|
||||
continue
|
||||
try:
|
||||
resp = f.delete(verbose=verbose,
|
||||
cascade_delete=args.cascade,
|
||||
headers=args.header,
|
||||
retries=args.retries)
|
||||
except requests.exceptions.RetryError:
|
||||
print(f" error: max retries exceeded for {f.name}", file=sys.stderr)
|
||||
errors = True
|
||||
continue
|
||||
|
||||
if resp.status_code != 204:
|
||||
errors = True
|
||||
msg = get_s3_xml_text(resp.content)
|
||||
print(f" error: {msg} ({resp.status_code})", file=sys.stderr)
|
||||
continue
|
||||
return errors
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
||||
"""
|
||||
Main entry point for 'ia delete'.
|
||||
"""
|
||||
verbose = not args.quiet
|
||||
item = args.session.get_item(args.identifier)
|
||||
if not item.exists:
|
||||
print(f"{item.identifier}: skipping, item doesn't exist.", file=sys.stderr)
|
||||
return
|
||||
|
||||
# Add keep-old-version by default.
|
||||
if "x-archive-keep-old-version" not in args.header and not args.no_backup:
|
||||
args.header["x-archive-keep-old-version"] = "1"
|
||||
|
||||
if verbose:
|
||||
print(f"Deleting files from {item.identifier}", file=sys.stderr)
|
||||
|
||||
files = get_files_to_delete(args, item)
|
||||
|
||||
if not files:
|
||||
print(" warning: no files found, nothing deleted.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
errors = delete_files(files, args, item, verbose)
|
||||
|
||||
if errors:
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
"""
|
||||
ia_download.py
|
||||
|
||||
'ia' subcommand for downloading files from archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import TextIO
|
||||
|
||||
from internetarchive.cli.cli_utils import (
|
||||
QueryStringAction,
|
||||
validate_dir_path,
|
||||
validate_identifier,
|
||||
)
|
||||
from internetarchive.files import File
|
||||
from internetarchive.search import Search
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for download command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("download",
|
||||
aliases=["do"],
|
||||
help="Retrieve and modify archive.org item metadata")
|
||||
|
||||
# Main options
|
||||
parser.add_argument("identifier",
|
||||
nargs="?",
|
||||
type=validate_identifier,
|
||||
help="Identifier for the upload")
|
||||
parser.add_argument("file",
|
||||
nargs="*",
|
||||
help="Files to download")
|
||||
|
||||
# Additional options
|
||||
parser.add_argument("-q", "--quiet",
|
||||
action="store_true",
|
||||
help="Turn off ia's output [default: False]")
|
||||
parser.add_argument("-d", "--dry-run",
|
||||
action="store_true",
|
||||
help="Print URLs to stdout and exit")
|
||||
parser.add_argument("-i", "--ignore-existing",
|
||||
action="store_true",
|
||||
help="Clobber files already downloaded")
|
||||
parser.add_argument("-C", "--checksum",
|
||||
action="store_true",
|
||||
help="Skip files based on checksum [default: False]")
|
||||
parser.add_argument("--checksum-archive",
|
||||
action="store_true",
|
||||
help="Skip files based on _checksum_archive.txt file")
|
||||
parser.add_argument("-R", "--retries",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Set number of retries to <retries> [default: 5]")
|
||||
parser.add_argument("-I", "--itemlist",
|
||||
type=argparse.FileType("r"),
|
||||
help=("Download items from a specified file. "
|
||||
"Itemlists should be a plain text file with one "
|
||||
"identifier per line"))
|
||||
parser.add_argument("-S", "--search",
|
||||
help="Download items returned from a specified search query")
|
||||
parser.add_argument("-P", "--search-parameters",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Parameters to send with your --search query")
|
||||
parser.add_argument("-g", "--glob",
|
||||
help=("Only download files whose filename matches "
|
||||
"the given glob pattern"))
|
||||
parser.add_argument("-e", "--exclude",
|
||||
help=("Exclude files whose filename matches "
|
||||
"the given glob pattern"))
|
||||
parser.add_argument("-f", "--format",
|
||||
nargs="+",
|
||||
help=("Only download files of the specified format. "
|
||||
"Use this option multiple times to download "
|
||||
"multiple formats. You can use the following command to "
|
||||
"retrieve a list of file formats contained within a "
|
||||
"given item: ia metadata --formats <identifier>"))
|
||||
parser.add_argument("--on-the-fly",
|
||||
action="store_true",
|
||||
help=("Download on-the-fly files, as well as other "
|
||||
"matching files. on-the-fly files include derivative "
|
||||
"EPUB, MOBI and DAISY files [default: False]"))
|
||||
parser.add_argument("--no-directories",
|
||||
action="store_true",
|
||||
help=("Download files into working directory. "
|
||||
"Do not create item directories"))
|
||||
parser.add_argument("--destdir",
|
||||
type=validate_dir_path,
|
||||
help=("The destination directory to download files "
|
||||
"and item directories to"))
|
||||
parser.add_argument("-s", "--stdout",
|
||||
action="store_true",
|
||||
help="Write file contents to stdout")
|
||||
parser.add_argument("--no-change-timestamp",
|
||||
action="store_true",
|
||||
help=("Don't change the timestamp of downloaded files to reflect "
|
||||
"the source material"))
|
||||
parser.add_argument("-p", "--parameters",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Parameters to send with your download request (e.g. `cnt=0`)")
|
||||
parser.add_argument("-a", "--download-history",
|
||||
action="store_true",
|
||||
help="Also download files from the history directory")
|
||||
parser.add_argument("--source",
|
||||
nargs="+",
|
||||
help=("Filter files based on their source value in files.xml "
|
||||
"(i.e. `original`, `derivative`, `metadata`)"))
|
||||
parser.add_argument("--exclude-source",
|
||||
nargs="+",
|
||||
help=("Filter files based on their source value in files.xml "
|
||||
"(i.e. `original`, `derivative`, `metadata`)"))
|
||||
parser.add_argument("-t", "--timeout",
|
||||
type=float,
|
||||
help=("Set a timeout for download requests. "
|
||||
"This sets both connect and read timeout"))
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for 'ia download'.
|
||||
"""
|
||||
ids: list[File | str] | Search | TextIO
|
||||
|
||||
if args.itemlist:
|
||||
ids = [x.strip() for x in args.itemlist]
|
||||
total_ids = len(ids)
|
||||
elif args.search:
|
||||
try:
|
||||
_search = args.session.search_items(args.search,
|
||||
params=args.search_parameters)
|
||||
total_ids = _search.num_found
|
||||
if total_ids == 0:
|
||||
print(f"error: the query '{args.search}' returned no results", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
ids = _search
|
||||
except ValueError as e:
|
||||
print(f"error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Download specific files.
|
||||
if args.identifier and args.identifier != "-":
|
||||
if "/" in args.identifier:
|
||||
identifier = args.identifier.split("/")[0]
|
||||
files = ["/".join(args.identifier.split("/")[1:])]
|
||||
else:
|
||||
identifier = args.identifier
|
||||
files = args.file
|
||||
total_ids = 1
|
||||
ids = [identifier]
|
||||
elif args.identifier == "-":
|
||||
total_ids = 1
|
||||
ids = sys.stdin
|
||||
files = None
|
||||
else:
|
||||
files = None
|
||||
|
||||
errors = []
|
||||
for i, identifier in enumerate(ids):
|
||||
try:
|
||||
identifier = identifier.strip()
|
||||
except AttributeError:
|
||||
identifier = identifier.get("identifier")
|
||||
if total_ids > 1:
|
||||
item_index = f"{i + 1}/{total_ids}"
|
||||
else:
|
||||
item_index = None
|
||||
|
||||
try:
|
||||
item = args.session.get_item(identifier)
|
||||
except Exception as exc:
|
||||
print(f"{identifier}: failed to retrieve item metadata - errors", file=sys.stderr)
|
||||
if "You are attempting to make an HTTPS" in str(exc):
|
||||
print(f"\n{exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
continue
|
||||
|
||||
# Otherwise, download the entire item.
|
||||
ignore_history_dir = bool(args.download_history)
|
||||
_errors = item.download(
|
||||
files=files,
|
||||
formats=args.format,
|
||||
glob_pattern=args.glob,
|
||||
exclude_pattern=args.exclude,
|
||||
dry_run=args.dry_run,
|
||||
verbose=not args.quiet,
|
||||
ignore_existing=args.ignore_existing,
|
||||
checksum=args.checksum,
|
||||
checksum_archive=args.checksum_archive,
|
||||
destdir=args.destdir,
|
||||
no_directory=args.no_directories,
|
||||
retries=args.retries,
|
||||
item_index=item_index,
|
||||
ignore_errors=True,
|
||||
on_the_fly=args.on_the_fly,
|
||||
no_change_timestamp=args.no_change_timestamp,
|
||||
params=args.parameters,
|
||||
ignore_history_dir=ignore_history_dir,
|
||||
source=args.source,
|
||||
exclude_source=args.exclude_source,
|
||||
stdout=args.stdout,
|
||||
timeout=args.timeout,
|
||||
)
|
||||
if _errors:
|
||||
errors.append(_errors)
|
||||
if errors:
|
||||
# TODO: add option for a summary/report.
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(0)
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
"""
|
||||
ia_flag.py
|
||||
|
||||
'ia' subcommand for managing flags on archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2025 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""Set up argument parser for the 'flag' subcommand.
|
||||
|
||||
Args:
|
||||
subparsers: argparse subparsers object from main CLI
|
||||
"""
|
||||
parser = subparsers.add_parser(
|
||||
"flag",
|
||||
aliases=["fl"],
|
||||
help="Manage flags",
|
||||
)
|
||||
parser.add_argument(
|
||||
"identifier",
|
||||
nargs="?",
|
||||
type=str,
|
||||
help="Identifier for the upload",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--user",
|
||||
type=str,
|
||||
help="User associated with the flag",
|
||||
)
|
||||
|
||||
group = parser.add_argument_group("Add flag operations")
|
||||
group.add_argument(
|
||||
"-a",
|
||||
"--add-flag",
|
||||
metavar="CATEGORY",
|
||||
type=str,
|
||||
help="Add a flag to the item",
|
||||
)
|
||||
|
||||
group = parser.add_argument_group("Delete flag operations")
|
||||
group.add_argument(
|
||||
"-d",
|
||||
"--delete-flag",
|
||||
metavar="CATEGORY",
|
||||
type=str,
|
||||
help="Delete a flag from the item",
|
||||
)
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""Handle flag subcommand execution.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments
|
||||
parser: Argument parser for error handling
|
||||
"""
|
||||
item = args.session.get_item(args.identifier)
|
||||
if args.user:
|
||||
flag_user = args.user
|
||||
else:
|
||||
flag_user = args.session.config.get("general", {}).get("screenname")
|
||||
if not flag_user.startswith('@'):
|
||||
flag_user = f"@{flag_user}"
|
||||
if args.add_flag:
|
||||
r = item.add_flag(args.add_flag, flag_user)
|
||||
j = r.json()
|
||||
if j.get("status") == "success":
|
||||
print(f"success: added '{args.add_flag}' flag by {flag_user} to {args.identifier}")
|
||||
else:
|
||||
print(f"error: {item.identifier} - {r.text}")
|
||||
|
||||
elif args.delete_flag:
|
||||
r = item.delete_flag(args.delete_flag, flag_user)
|
||||
j = r.json()
|
||||
if j.get("status") == "success":
|
||||
print(f"success: deleted '{args.delete_flag}' flag by {flag_user} from {args.identifier}")
|
||||
else:
|
||||
print(f"error: {item.identifier} - {r.text}")
|
||||
|
||||
else:
|
||||
r = item.get_flags()
|
||||
print(r.text)
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
"""
|
||||
ia_list.py
|
||||
|
||||
'ia' subcommand for listing files from archive.org items.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
from fnmatch import fnmatch
|
||||
from itertools import chain
|
||||
|
||||
from internetarchive.cli.cli_utils import validate_identifier
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for list command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("list",
|
||||
aliases=["ls"],
|
||||
help="list files from archive.org items")
|
||||
|
||||
# Positional arguments
|
||||
parser.add_argument("identifier",
|
||||
type=validate_identifier,
|
||||
help="identifier of the item")
|
||||
|
||||
# Options
|
||||
parser.add_argument("-v", "--verbose",
|
||||
action="store_true",
|
||||
help="print column headers")
|
||||
parser.add_argument("-a", "--all",
|
||||
action="store_true",
|
||||
help="list all information available for files")
|
||||
parser.add_argument("-l", "--location",
|
||||
action="store_true",
|
||||
help="print full URL for each file")
|
||||
parser.add_argument("-c", "--columns",
|
||||
action="append",
|
||||
type=prepare_columns,
|
||||
help="list specified file information")
|
||||
parser.add_argument("-g", "--glob",
|
||||
help="only return files matching the given pattern")
|
||||
parser.add_argument("-f", "--format",
|
||||
action="append",
|
||||
help="return files matching FORMAT")
|
||||
|
||||
parser.set_defaults(func=main)
|
||||
|
||||
|
||||
def prepare_columns(columns):
|
||||
"""
|
||||
Validate the path to the configuration file.
|
||||
|
||||
Returns:
|
||||
str: Validated list of columns
|
||||
"""
|
||||
if columns:
|
||||
if not isinstance(columns, list):
|
||||
columns = [columns]
|
||||
return list(chain.from_iterable([c.split(",") for c in columns]))
|
||||
return None
|
||||
|
||||
|
||||
def setup_columns(args, files):
|
||||
"""
|
||||
Setup and adjust columns for output based on args.
|
||||
"""
|
||||
if not args.columns:
|
||||
args.columns = ["name"]
|
||||
else:
|
||||
args.columns = list(chain.from_iterable(args.columns))
|
||||
|
||||
if args.all:
|
||||
args.columns = list(set(chain.from_iterable(k for k in files)))
|
||||
|
||||
# Make "name" the first column always.
|
||||
if "name" in args.columns:
|
||||
args.columns.remove("name")
|
||||
args.columns.insert(0, "name")
|
||||
|
||||
|
||||
def filter_files(args, files, item):
|
||||
"""
|
||||
Filter files based on glob patterns or formats.
|
||||
"""
|
||||
if args.glob:
|
||||
patterns = args.glob.split("|")
|
||||
return [f for f in files if any(fnmatch(f["name"], p) for p in patterns)]
|
||||
if args.format:
|
||||
return [f.__dict__ for f in item.get_files(formats=args.format)]
|
||||
return files
|
||||
|
||||
|
||||
def generate_output(files, args, dict_writer, item):
|
||||
"""
|
||||
Generate and write output based on filtered files and columns.
|
||||
"""
|
||||
output = []
|
||||
for f in files:
|
||||
file_dict = {}
|
||||
for key, val in f.items():
|
||||
if key in args.columns:
|
||||
if isinstance(val, (list, tuple, set)):
|
||||
val = ";".join(val)
|
||||
if key == "name" and args.location:
|
||||
file_dict[key] = (f"https://{args.session.host}"
|
||||
f"/download/{item.identifier}/{val}")
|
||||
else:
|
||||
file_dict[key] = val
|
||||
output.append(file_dict)
|
||||
if args.verbose:
|
||||
dict_writer.writer.writerow(args.columns)
|
||||
if all(x == {} for x in output):
|
||||
sys.exit(1)
|
||||
dict_writer.writerows(output)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Main entry point for 'ia list'.
|
||||
"""
|
||||
item = args.session.get_item(args.identifier)
|
||||
files = item.files
|
||||
|
||||
setup_columns(args, files)
|
||||
files = filter_files(args, files, item)
|
||||
|
||||
dict_writer = csv.DictWriter(sys.stdout, args.columns,
|
||||
delimiter="\t",
|
||||
lineterminator="\n")
|
||||
generate_output(files, args, dict_writer, item)
|
||||
|
|
@ -0,0 +1,324 @@
|
|||
"""
|
||||
ia_metadata.py
|
||||
|
||||
'ia' subcommand for modifying and retrieving metadata from archive.org items.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from copy import copy
|
||||
from typing import Mapping
|
||||
|
||||
from requests import Response
|
||||
|
||||
from internetarchive import item
|
||||
from internetarchive.cli.cli_utils import MetadataAction, QueryStringAction
|
||||
from internetarchive.exceptions import ItemLocateError
|
||||
from internetarchive.utils import json
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for metadata command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("metadata",
|
||||
aliases=["md"],
|
||||
help="Retrieve and modify archive.org item metadata")
|
||||
|
||||
parser.add_argument("identifier",
|
||||
nargs="?",
|
||||
type=str,
|
||||
help="Identifier for the upload")
|
||||
|
||||
# Mutually exclusive group for metadata modification options
|
||||
modify_group = parser.add_mutually_exclusive_group()
|
||||
modify_group.add_argument("-m", "--modify",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Modify the metadata of an item")
|
||||
modify_group.add_argument("-r", "--remove",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Remove KEY:VALUE from a metadata element")
|
||||
modify_group.add_argument("-a", "--append",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Append a string to a metadata element")
|
||||
modify_group.add_argument("-A", "--append-list",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Append a field to a metadata element")
|
||||
modify_group.add_argument("-i", "--insert",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help=("Insert a value into a multi-value field given "
|
||||
"an index (e.g. `--insert=collection[0]:foo`)"))
|
||||
|
||||
# Additional options
|
||||
parser.add_argument("-E", "--expect",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
help=("Test an expectation server-side before applying patch "
|
||||
"to item metadata"))
|
||||
parser.add_argument("-H", "--header",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="S3 HTTP headers to send with your request")
|
||||
parser.add_argument("-t", "--target",
|
||||
metavar="target",
|
||||
default="metadata",
|
||||
help="The metadata target to modify")
|
||||
parser.add_argument("-s", "--spreadsheet",
|
||||
metavar="metadata.csv",
|
||||
help="Modify metadata in bulk using a spreadsheet as input")
|
||||
parser.add_argument("-e", "--exists",
|
||||
action="store_true",
|
||||
help="Check if an item exists")
|
||||
parser.add_argument("-F", "--formats",
|
||||
action="store_true",
|
||||
help="Return the file-formats the given item contains")
|
||||
parser.add_argument("-p", "--priority",
|
||||
metavar="priority",
|
||||
help="Set the task priority")
|
||||
parser.add_argument("--timeout",
|
||||
metavar="value",
|
||||
help="Set a timeout for metadata writes")
|
||||
parser.add_argument("-R", "--reduced-priority",
|
||||
action="store_true",
|
||||
help="Submit task at a reduced priority.")
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def modify_metadata(item: item.Item,
|
||||
metadata: Mapping,
|
||||
args: argparse.Namespace,
|
||||
parser: argparse.ArgumentParser) -> Response:
|
||||
"""
|
||||
Modify metadata helper function.
|
||||
"""
|
||||
append = bool(args.append)
|
||||
append_list = bool(args.append_list)
|
||||
insert = bool(args.insert)
|
||||
try:
|
||||
r = item.modify_metadata(metadata, target=args.target, append=append,
|
||||
expect=args.expect, priority=args.priority,
|
||||
append_list=append_list, headers=args.header,
|
||||
insert=insert, reduced_priority=args.reduced_priority,
|
||||
timeout=args.timeout)
|
||||
assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response
|
||||
except ItemLocateError as exc:
|
||||
print(f"{item.identifier} - error: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if not r.json()["success"]:
|
||||
error_msg = r.json()["error"]
|
||||
etype = "warning" if "no changes" in r.text else "error"
|
||||
print(f"{item.identifier} - {etype} ({r.status_code}): {error_msg}", file=sys.stderr)
|
||||
return r
|
||||
print(f"{item.identifier} - success: {r.json()['log']}", file=sys.stderr)
|
||||
return r
|
||||
|
||||
|
||||
def remove_metadata(item: item.Item,
|
||||
metadata: Mapping,
|
||||
args: argparse.Namespace,
|
||||
parser: argparse.ArgumentParser) -> Response:
|
||||
"""
|
||||
Remove metadata helper function.
|
||||
"""
|
||||
md: dict[str, list | str] = defaultdict(list)
|
||||
for key in metadata:
|
||||
src_md = {}
|
||||
if args.target.startswith("files/"):
|
||||
for f in item.get_files():
|
||||
if f.name == "/".join(args.target.split("/")[1:]):
|
||||
src_md = f.__dict__.get(key, {})
|
||||
break
|
||||
else:
|
||||
src_md = copy(item.metadata.get(key, {}))
|
||||
if not src_md:
|
||||
continue
|
||||
|
||||
if key == "collection":
|
||||
_col = copy(metadata[key])
|
||||
_src_md = copy(src_md)
|
||||
if not isinstance(_col, list):
|
||||
_col = [_col]
|
||||
if not isinstance(_src_md, list):
|
||||
_src_md = [_src_md] # type: ignore
|
||||
for c in _col:
|
||||
if c not in _src_md:
|
||||
r = item.remove_from_simplelist(c, "holdings")
|
||||
j = r.json()
|
||||
if j.get("success"):
|
||||
print(f"{item.identifier} - success: {item.identifier} no longer in {c}",
|
||||
file=sys.stderr)
|
||||
sys.exit(0)
|
||||
elif j.get("error", "").startswith("no row to delete for"):
|
||||
print(f"{item.identifier} - success: {item.identifier} no longer in {c}",
|
||||
file=sys.stderr)
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"{item.identifier} - error: {j.get('error')}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not isinstance(src_md, list):
|
||||
if key == "subject":
|
||||
if isinstance(src_md, str):
|
||||
src_md = src_md.split(";")
|
||||
elif key == "collection":
|
||||
print(f"{item.identifier} - error: all collections would be removed, "
|
||||
"not submitting task.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if src_md == metadata[key]:
|
||||
md[key] = "REMOVE_TAG"
|
||||
continue
|
||||
|
||||
for x in src_md:
|
||||
if isinstance(metadata[key], list):
|
||||
if x not in metadata[key]:
|
||||
md[key].append(x) # type: ignore
|
||||
else:
|
||||
if x != metadata[key]:
|
||||
md[key].append(x) # type: ignore
|
||||
|
||||
if len(md[key]) == len(src_md):
|
||||
del md[key]
|
||||
|
||||
if md.get("collection") == []:
|
||||
print(f"{item.identifier} - error: all collections would be removed, not submitting task.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
elif not md:
|
||||
print(f"{item.identifier} - warning: nothing needed to be removed.", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
|
||||
r = modify_metadata(item, md, args, parser)
|
||||
return r
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for 'ia metadata'.
|
||||
"""
|
||||
formats = set()
|
||||
responses: list[bool | Response] = []
|
||||
|
||||
item = args.session.get_item(args.identifier)
|
||||
|
||||
# Check existence of item.
|
||||
if args.exists:
|
||||
if item.exists:
|
||||
responses.append(True)
|
||||
print(f"{args.identifier} exists", file=sys.stderr)
|
||||
else:
|
||||
responses.append(False)
|
||||
print(f"{args.identifier} does not exist", file=sys.stderr)
|
||||
if all(r is True for r in responses):
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
# Modify metadata.
|
||||
elif (args.modify or args.append or args.append_list
|
||||
or args.remove or args.insert):
|
||||
# TODO: Find a better way to handle this.
|
||||
if args.modify:
|
||||
metadata = args.modify
|
||||
elif args.append:
|
||||
metadata = args.append
|
||||
elif args.append_list:
|
||||
metadata = args.append_list
|
||||
elif args.insert:
|
||||
metadata = args.insert
|
||||
if args.remove:
|
||||
metadata = args.remove
|
||||
|
||||
if args.remove:
|
||||
responses.append(remove_metadata(item, metadata, args, parser))
|
||||
else:
|
||||
responses.append(modify_metadata(item, metadata, args, parser))
|
||||
if all(r.status_code == 200 for r in responses): # type: ignore
|
||||
sys.exit(0)
|
||||
else:
|
||||
for r in responses:
|
||||
assert isinstance(r, Response)
|
||||
if r.status_code == 200:
|
||||
continue
|
||||
# We still want to exit 0 if the non-200 is a
|
||||
# "no changes to xml" error.
|
||||
elif "no changes" in r.text:
|
||||
continue
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
# Get metadata.
|
||||
elif args.formats:
|
||||
for f in item.get_files():
|
||||
formats.add(f.format)
|
||||
print("\n".join(formats))
|
||||
|
||||
# Edit metadata for items in bulk, using a spreadsheet as input.
|
||||
elif args.spreadsheet:
|
||||
if not args.priority:
|
||||
args.priority = -5
|
||||
with open(args.spreadsheet, newline="", encoding="utf-8-sig") as csvfp:
|
||||
spreadsheet = csv.DictReader(csvfp)
|
||||
responses = []
|
||||
for row in spreadsheet:
|
||||
if not row["identifier"]:
|
||||
continue
|
||||
item = args.session.get_item(row["identifier"])
|
||||
if row.get("file"):
|
||||
del row["file"]
|
||||
metadata = {k.lower(): v for k, v in row.items() if v}
|
||||
responses.append(modify_metadata(item, metadata, args, parser))
|
||||
|
||||
if all(r.status_code == 200 for r in responses): # type: ignore
|
||||
sys.exit(0)
|
||||
else:
|
||||
for r in responses:
|
||||
assert isinstance(r, Response)
|
||||
if r.status_code == 200:
|
||||
continue
|
||||
# We still want to exit 0 if the non-200 is a
|
||||
# "no changes to xml" error.
|
||||
elif "no changes" in r.text:
|
||||
continue
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
||||
# Dump JSON to stdout.
|
||||
else:
|
||||
metadata_str = json.dumps(item.item_metadata)
|
||||
print(metadata_str)
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
"""
|
||||
ia_move.py
|
||||
|
||||
'ia' subcommand for moving files on archive.org
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from internetarchive.cli import ia_copy
|
||||
from internetarchive.cli.cli_utils import MetadataAction, QueryStringAction
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for move command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("move",
|
||||
aliases=["mv"],
|
||||
help="Move and rename files in archive.org items")
|
||||
|
||||
# Positional arguments
|
||||
parser.add_argument("source",
|
||||
metavar="SOURCE",
|
||||
help="Source file formatted as: identifier/file")
|
||||
parser.add_argument("destination",
|
||||
metavar="DESTINATION",
|
||||
help="Destination file formatted as: identifier/file")
|
||||
|
||||
# Options
|
||||
parser.add_argument("-m", "--metadata",
|
||||
metavar="KEY:VALUE",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
help=("Metadata to add to your new item, "
|
||||
"if you are moving the file to a new item"))
|
||||
parser.add_argument("-H", "--header",
|
||||
metavar="KEY:VALUE",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
default={},
|
||||
help="S3 HTTP headers to send with your request")
|
||||
parser.add_argument("--replace-metadata",
|
||||
action="store_true",
|
||||
help=("Only use metadata specified as argument, do not copy any "
|
||||
"from the source item"))
|
||||
parser.add_argument("--ignore-file-metadata",
|
||||
action="store_true",
|
||||
help="Do not copy file metadata")
|
||||
parser.add_argument("-n", "--no-derive",
|
||||
action="store_true",
|
||||
help="Do not derive uploaded files")
|
||||
parser.add_argument("--no-backup",
|
||||
action="store_true",
|
||||
help=("Turn off archive.org backups, "
|
||||
"clobbered files will not be saved to 'history/files/$key.~N~'"))
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for ia move command.
|
||||
"""
|
||||
# Add keep-old-version by default.
|
||||
if not args.header.get("x-archive-keep-old-version") and not args.no_backup:
|
||||
args.header["x-archive-keep-old-version"] = "1"
|
||||
|
||||
# Call ia_copy.
|
||||
_, src_file = ia_copy.main(args, cmd="move", parser=parser)
|
||||
if src_file:
|
||||
dr = src_file.delete(headers=args.header, cascade_delete=True)
|
||||
else:
|
||||
print(f"error: {src_file} does not exist", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if dr.status_code == 204:
|
||||
print(f"success: moved '{args.source}' to '{args.destination}'", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
print(f"error: {dr.content}", file=sys.stderr)
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
"""
|
||||
ia_reviews.py
|
||||
|
||||
'ia' subcommand for listing, submitting, and deleting reviews for archive.org items.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for list command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("reviews",
|
||||
aliases=["re"],
|
||||
help="submit and modify reviews for archive.org items")
|
||||
|
||||
# Positional arguments
|
||||
parser.add_argument("identifier",
|
||||
type=str,
|
||||
help="identifier of the item")
|
||||
|
||||
# Options
|
||||
parser.add_argument("-d", "--delete",
|
||||
action="store_true",
|
||||
help="delete your review")
|
||||
parser.add_argument("-t", "--title",
|
||||
type=str,
|
||||
help="the title of your review")
|
||||
parser.add_argument("-b", "--body",
|
||||
type=str,
|
||||
help="the body of your review")
|
||||
parser.add_argument("-s", "--stars",
|
||||
type=int,
|
||||
help="the number of stars for your review")
|
||||
parser.add_argument("-i", "--index",
|
||||
action="store_true",
|
||||
help="Index a review")
|
||||
parser.add_argument("-n", "--noindex",
|
||||
action="store_true",
|
||||
help="Remove a review from the index")
|
||||
|
||||
# Conditional arguments that require --delete
|
||||
delete_group = parser.add_argument_group("delete options",
|
||||
("these options are used with "
|
||||
"the --delete flag"))
|
||||
delete_group.add_argument("-u", "--username",
|
||||
type=str,
|
||||
help="delete reviews for a specific user given USERNAME")
|
||||
delete_group.add_argument("-S", "--screenname",
|
||||
type=str,
|
||||
help="delete reviews for a specific user given SCREENNAME")
|
||||
delete_group.add_argument("-I", "--itemname",
|
||||
type=str,
|
||||
help="delete reviews for a specific user given ITEMNAME")
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for 'ia reviews'.
|
||||
"""
|
||||
item = args.session.get_item(args.identifier)
|
||||
if args.index:
|
||||
r = item.index_review(username=args.username,
|
||||
screenname=args.screenname,
|
||||
itemname=args.itemname)
|
||||
if r.json().get("success"):
|
||||
print(f"{item.identifier} - success: review indexed", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
elif args.noindex:
|
||||
r = item.noindex_review(username=args.username,
|
||||
screenname=args.screenname,
|
||||
itemname=args.itemname)
|
||||
if r.json().get("success"):
|
||||
print(f"{item.identifier} - success: review removed from index", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
if args.delete:
|
||||
r = item.delete_review(username=args.username,
|
||||
screenname=args.screenname,
|
||||
itemname=args.itemname)
|
||||
elif not args.body and not args.title:
|
||||
try:
|
||||
r = item.get_review()
|
||||
print(r.text)
|
||||
sys.exit(0)
|
||||
except HTTPError as exc:
|
||||
if exc.response.status_code == 404: # type: ignore
|
||||
sys.exit(0)
|
||||
else:
|
||||
raise exc
|
||||
else:
|
||||
if (args.title and not args.body) or (args.body and not args.title):
|
||||
parser.error("both --title and --body must be provided")
|
||||
r = item.review(args.title, args.body, args.stars)
|
||||
j = r.json()
|
||||
if j.get("success") or "no change detected" in j.get("error", "").lower():
|
||||
task_id = j.get("value", {}).get("task_id")
|
||||
if task_id:
|
||||
print((f"{item.identifier} - success: "
|
||||
f"https://catalogd.archive.org/log/{task_id}"),
|
||||
file=sys.stderr)
|
||||
else:
|
||||
print(f"{item.identifier} - warning: no changes detected!", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"{item.identifier} - error: {j.get('error')}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
"""
|
||||
ia_search.py
|
||||
|
||||
'ia' subcommand for searching items on archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from itertools import chain
|
||||
|
||||
from requests.exceptions import ConnectTimeout, ReadTimeout
|
||||
|
||||
from internetarchive.cli.cli_utils import FlattenListAction, QueryStringAction
|
||||
from internetarchive.exceptions import AuthenticationError
|
||||
from internetarchive.utils import json
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for search command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("search",
|
||||
aliases=["se"],
|
||||
help="Search items on archive.org")
|
||||
|
||||
# Positional arguments
|
||||
parser.add_argument("query",
|
||||
type=str,
|
||||
help="Search query or queries.")
|
||||
|
||||
# Optional arguments
|
||||
parser.add_argument("-p", "--parameters",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Parameters to send with your query.")
|
||||
parser.add_argument("-H", "--header",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
metavar="KEY:VALUE",
|
||||
help="Add custom headers to your search request.")
|
||||
parser.add_argument("-s", "--sort",
|
||||
action="append",
|
||||
help=("Sort search results by specified fields. "
|
||||
"See https://archive.org/advancedsearch.php "
|
||||
"for full list of sort values"
|
||||
" (e.g. --sort 'date desc', --sort 'date asc', etc.)."))
|
||||
parser.add_argument("-i", "--itemlist",
|
||||
action="store_true",
|
||||
help="Output identifiers only.")
|
||||
parser.add_argument("-f", "--field",
|
||||
nargs="+",
|
||||
action=FlattenListAction,
|
||||
help="Metadata fields to return.")
|
||||
parser.add_argument("-n", "--num-found",
|
||||
action="store_true",
|
||||
help="Print the number of results to stdout.")
|
||||
parser.add_argument("-F", "--fts",
|
||||
action="store_true",
|
||||
help="Beta support for querying the archive.org full text search API.")
|
||||
parser.add_argument("-D", "--dsl-fts",
|
||||
action="store_true",
|
||||
help="Submit --fts query in dsl.")
|
||||
parser.add_argument("-t", "--timeout",
|
||||
type=float,
|
||||
default=300,
|
||||
help="Set the timeout in seconds.")
|
||||
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def prepare_values(value):
|
||||
"""
|
||||
Prepare comma-separated values based on the input value.
|
||||
"""
|
||||
if value:
|
||||
return list(chain.from_iterable([x.split(",") for x in value]))
|
||||
return None
|
||||
|
||||
|
||||
def perform_search(args, fields, sorts, r_kwargs):
|
||||
"""
|
||||
Perform the search using the provided arguments and request kwargs.
|
||||
"""
|
||||
return args.session.search_items(args.query, # type: ignore
|
||||
fields=fields,
|
||||
sorts=sorts,
|
||||
params=args.parameters,
|
||||
full_text_search=args.fts,
|
||||
dsl_fts=args.dsl_fts,
|
||||
request_kwargs=r_kwargs)
|
||||
|
||||
|
||||
def handle_search_results(args, search):
|
||||
"""
|
||||
Handle search results based on command-line arguments.
|
||||
"""
|
||||
if args.num_found:
|
||||
print(search.num_found)
|
||||
sys.exit(0)
|
||||
|
||||
for result in search:
|
||||
if args.itemlist:
|
||||
if args.fts or args.dsl_fts:
|
||||
print("\n".join(result.get("fields", {}).get("identifier")))
|
||||
else:
|
||||
print(result.get("identifier", ""))
|
||||
else:
|
||||
print(json.dumps(result))
|
||||
if result.get("error"):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def handle_value_error(exc):
|
||||
"""
|
||||
Handle ValueError exception.
|
||||
"""
|
||||
return f"error: {exc}"
|
||||
|
||||
|
||||
def handle_connect_timeout():
|
||||
"""
|
||||
Handle ConnectTimeout exception.
|
||||
"""
|
||||
return "error: Request timed out. Increase the --timeout and try again."
|
||||
|
||||
|
||||
def handle_read_timeout():
|
||||
"""
|
||||
Handle ReadTimeout exception.
|
||||
"""
|
||||
return "error: The server timed out and failed to return all search results, please try again"
|
||||
|
||||
|
||||
def handle_authentication_error(exc):
|
||||
"""
|
||||
Handle AuthenticationError exception.
|
||||
"""
|
||||
return f"error: {exc}"
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for 'ia search'.
|
||||
"""
|
||||
try:
|
||||
# Prepare fields and sorts.
|
||||
fields = prepare_values(args.field)
|
||||
sorts = prepare_values(args.sort)
|
||||
|
||||
# Prepare request kwargs.
|
||||
r_kwargs = {
|
||||
"headers": args.header,
|
||||
"timeout": args.timeout,
|
||||
}
|
||||
|
||||
# Perform search.
|
||||
search = perform_search(args, fields, sorts, r_kwargs)
|
||||
|
||||
# Handle search results.
|
||||
handle_search_results(args, search)
|
||||
|
||||
except ValueError as exc:
|
||||
error_message = handle_value_error(exc)
|
||||
print(error_message, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except ConnectTimeout:
|
||||
error_message = handle_connect_timeout()
|
||||
print(error_message, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except ReadTimeout:
|
||||
error_message = handle_read_timeout()
|
||||
print(error_message, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except AuthenticationError as exc:
|
||||
error_message = handle_authentication_error(exc)
|
||||
print(error_message, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
"""
|
||||
ia_simplelists.py
|
||||
|
||||
'ia' subcommand for managing simplelists on archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2025 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from internetarchive.utils import json
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""Set up argument parser for the 'simplelists' subcommand.
|
||||
|
||||
Args:
|
||||
subparsers: argparse subparsers object from main CLI
|
||||
"""
|
||||
parser = subparsers.add_parser("simplelists",
|
||||
aliases=["sl"],
|
||||
help="Manage simplelists")
|
||||
parser.add_argument(
|
||||
"identifier",
|
||||
nargs="?",
|
||||
type=str,
|
||||
help="Identifier for the upload"
|
||||
)
|
||||
|
||||
group = parser.add_argument_group("List operations")
|
||||
group.add_argument(
|
||||
"-p", "--list-parents",
|
||||
action="store_true",
|
||||
help="List parent lists for the given identifier"
|
||||
)
|
||||
group.add_argument(
|
||||
"-c", "--list-children",
|
||||
action="store_true",
|
||||
help="List children in parent list"
|
||||
)
|
||||
group.add_argument(
|
||||
"-l", "--list-name",
|
||||
type=str,
|
||||
help="Name of the list to operate on"
|
||||
)
|
||||
|
||||
group = parser.add_argument_group("Modification operations")
|
||||
group.add_argument(
|
||||
"-s", "--set-parent",
|
||||
metavar="PARENT",
|
||||
type=str,
|
||||
help="Add identifier to specified parent list"
|
||||
)
|
||||
group.add_argument(
|
||||
"-n", "--notes",
|
||||
metavar="NOTES",
|
||||
type=str,
|
||||
help="Notes to attach to the list membership"
|
||||
)
|
||||
group.add_argument(
|
||||
"-r", "--remove-parent",
|
||||
metavar="PARENT",
|
||||
type=str,
|
||||
help="Remove identifier from specified parent list"
|
||||
)
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def submit_patch(patch, args):
|
||||
"""Submit patch request to simplelists API"""
|
||||
data = {"-patch": json.dumps(patch), "-target": "simplelists"}
|
||||
url = f"{args.session.protocol}//{args.session.host}/metadata/{args.identifier}"
|
||||
return args.session.post(url, data=data)
|
||||
|
||||
|
||||
def _handle_patch_operation(args, parser, operation):
|
||||
"""Handle set/delete patch operations for simplelists.
|
||||
|
||||
:param operation: The patch operation type ('set' or 'delete')
|
||||
"""
|
||||
if not args.identifier:
|
||||
parser.error("Missing required identifier argument")
|
||||
if not args.list_name:
|
||||
parser.error("Must specify list name with -l/--list-name")
|
||||
|
||||
patch = {
|
||||
"op": operation,
|
||||
"parent": args.set_parent or args.remove_parent,
|
||||
"list": args.list_name,
|
||||
}
|
||||
if args.notes:
|
||||
patch["notes"] = args.notes
|
||||
|
||||
r = submit_patch(patch, args)
|
||||
try:
|
||||
r.raise_for_status()
|
||||
print(f"success: {args.identifier}")
|
||||
except Exception as e:
|
||||
print(f"error: {args.identifier} - {e!s}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""Handle simplelists subcommand execution.
|
||||
|
||||
Args:
|
||||
|
||||
args: Parsed command-line arguments
|
||||
parser: Argument parser for error handling
|
||||
"""
|
||||
if args.list_parents:
|
||||
item = args.session.get_item(args.identifier)
|
||||
simplelists = item.item_metadata.get("simplelists")
|
||||
if simplelists:
|
||||
print(json.dumps(simplelists))
|
||||
elif args.list_children:
|
||||
args.list_name = args.list_name or "catchall"
|
||||
query = f"simplelists__{args.list_name}:{args.identifier or '*'}"
|
||||
for result in args.session.search_items(query):
|
||||
print(json.dumps(result))
|
||||
|
||||
elif args.set_parent:
|
||||
_handle_patch_operation(args, parser, "set")
|
||||
|
||||
elif args.remove_parent:
|
||||
_handle_patch_operation(args, parser, "delete")
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
|
@ -0,0 +1,177 @@
|
|||
"""
|
||||
ia_tasks.py
|
||||
|
||||
'ia' subcommand for retrieving information about archive.org catalog tasks.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from internetarchive.cli.cli_utils import PostDataAction, QueryStringAction
|
||||
from internetarchive.utils import json
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for tasks command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("tasks",
|
||||
aliases=["ta"],
|
||||
help="Retrieve information about your archive.org catalog tasks")
|
||||
|
||||
parser.add_argument("-t", "--task",
|
||||
nargs="*",
|
||||
help="Return information about the given task.")
|
||||
parser.add_argument("-G", "--get-task-log",
|
||||
help="Return the given tasks task log.")
|
||||
parser.add_argument("-p", "--parameter",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
default={},
|
||||
metavar="KEY:VALUE",
|
||||
help="URL parameters passed to catalog.php.")
|
||||
parser.add_argument("-T", "--tab-output",
|
||||
action="store_true",
|
||||
help="Output task info in tab-delimited columns.")
|
||||
parser.add_argument("-c", "--cmd",
|
||||
type=str,
|
||||
help="The task to submit (e.g., make_dark.php).")
|
||||
parser.add_argument("-C", "--comment",
|
||||
type=str,
|
||||
help="A reasonable explanation for why a task is being submitted.")
|
||||
parser.add_argument("-a", "--task-args",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
default={},
|
||||
metavar="KEY:VALUE",
|
||||
help="Args to submit to the Tasks API.")
|
||||
parser.add_argument("-d", "--data",
|
||||
nargs="+",
|
||||
action=PostDataAction,
|
||||
metavar="KEY:VALUE",
|
||||
default={},
|
||||
help="Additional data to send when submitting a task.")
|
||||
parser.add_argument("-r", "--reduced-priority",
|
||||
action="store_true",
|
||||
help="Submit task at a reduced priority.")
|
||||
parser.add_argument("-l", "--get-rate-limit",
|
||||
action="store_true",
|
||||
help="Get rate limit info.")
|
||||
parser.add_argument("identifier",
|
||||
type=str,
|
||||
nargs="?",
|
||||
help="Identifier for tasks specific operations.")
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def handle_task_submission_result(result, cmd):
|
||||
"""
|
||||
Handle the result of a task submission.
|
||||
"""
|
||||
if result.get("success"):
|
||||
task_log_url = result.get("value", {}).get("log")
|
||||
print(f"success: {task_log_url}", file=sys.stderr)
|
||||
elif "already queued/running" in result.get("error", ""):
|
||||
print(f"success: {cmd} task already queued/running", file=sys.stderr)
|
||||
else:
|
||||
print(f"error: {result.get('error')}", file=sys.stderr)
|
||||
sys.exit(0 if result.get("success") else 1)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
|
||||
"""
|
||||
Main entry point for 'ia tasks'.
|
||||
"""
|
||||
# Tasks write API.
|
||||
if args.cmd:
|
||||
if args.get_rate_limit:
|
||||
r = args.session.get_tasks_api_rate_limit(args.cmd)
|
||||
print(json.dumps(r))
|
||||
sys.exit(0)
|
||||
args.data["args"] = args.task_args
|
||||
r = args.session.submit_task(args.identifier,
|
||||
args.cmd,
|
||||
comment=args.comment,
|
||||
priority=int(args.data.get("priority", 0)),
|
||||
reduced_priority=args.reduced_priority,
|
||||
data=args.data)
|
||||
handle_task_submission_result(r.json(), args.cmd)
|
||||
sys.exit(0)
|
||||
|
||||
# Tasks read API.
|
||||
if args.identifier:
|
||||
_params = {"identifier": args.identifier, "catalog": 1, "history": 1}
|
||||
_params.update(args.parameter)
|
||||
args.parameter = _params
|
||||
elif args.get_task_log:
|
||||
log = args.session.get_task_log(args.get_task_log, **args.parameter)
|
||||
print(log.encode("utf-8", errors="surrogateescape")
|
||||
.decode("utf-8", errors="replace"))
|
||||
sys.exit(0)
|
||||
|
||||
queryable_params = [
|
||||
"identifier",
|
||||
"task_id",
|
||||
"server",
|
||||
"cmd",
|
||||
"args",
|
||||
"submitter",
|
||||
"priority",
|
||||
"wait_admin",
|
||||
"submittime",
|
||||
]
|
||||
|
||||
if not (args.identifier
|
||||
or args.parameter.get("task_id")):
|
||||
_params = {"catalog": 1, "history": 0}
|
||||
_params.update(args.parameter)
|
||||
args.parameter = _params
|
||||
|
||||
if not any(x in args.parameter for x in queryable_params):
|
||||
_params = {"submitter": args.session.user_email, "catalog": 1, "history": 0, "summary": 0}
|
||||
_params.update(args.parameter)
|
||||
args.parameter = _params
|
||||
|
||||
if args.tab_output:
|
||||
warn_msg = ("tab-delimited output will be removed in a future release. "
|
||||
"Please switch to the default JSON output.")
|
||||
warnings.warn(warn_msg, stacklevel=2)
|
||||
for t in args.session.get_tasks(params=args.parameter):
|
||||
# Legacy support for tab-delimited output.
|
||||
# Mypy is confused by CatalogTask members being created from kwargs
|
||||
if args.tab_output:
|
||||
color = t.color if t.color else "done"
|
||||
task_args = "\t".join([f"{k}={v}" for k, v in t.args.items()]) # type: ignore
|
||||
output = "\t".join([str(x) for x in [
|
||||
t.identifier,
|
||||
t.task_id,
|
||||
t.server,
|
||||
t.submittime,
|
||||
t.cmd,
|
||||
color,
|
||||
t.submitter,
|
||||
task_args,
|
||||
] if x])
|
||||
print(output, flush=True)
|
||||
else:
|
||||
print(t.json(), flush=True)
|
||||
|
|
@ -0,0 +1,376 @@
|
|||
"""
|
||||
ia_upload.py
|
||||
|
||||
'ia' subcommand for uploading files to archive.org.
|
||||
"""
|
||||
|
||||
# Copyright (C) 2012-2024 Internet Archive
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
import webbrowser
|
||||
from copy import deepcopy
|
||||
from locale import getpreferredencoding
|
||||
from tempfile import TemporaryFile
|
||||
from typing import Union
|
||||
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from internetarchive.cli.cli_utils import (
|
||||
MetadataAction,
|
||||
QueryStringAction,
|
||||
get_args_dict,
|
||||
validate_identifier,
|
||||
)
|
||||
from internetarchive.utils import (
|
||||
InvalidIdentifierException,
|
||||
JSONDecodeError,
|
||||
is_valid_metadata_key,
|
||||
json,
|
||||
)
|
||||
|
||||
|
||||
def setup(subparsers):
|
||||
"""
|
||||
Setup args for copy command.
|
||||
|
||||
Args:
|
||||
subparsers: subparser object passed from ia.py
|
||||
"""
|
||||
parser = subparsers.add_parser("upload",
|
||||
aliases=["up"],
|
||||
help="Upload files to archive.org")
|
||||
|
||||
# Positional arguments
|
||||
parser.add_argument("identifier",
|
||||
type=validate_identifier,
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Identifier for the upload")
|
||||
parser.add_argument("file",
|
||||
nargs="*",
|
||||
type=validate_file,
|
||||
help="File(s) to upload")
|
||||
|
||||
# Options
|
||||
parser.add_argument("-q", "--quiet",
|
||||
action="store_true",
|
||||
help="Turn off ia's output")
|
||||
parser.add_argument("-d", "--debug",
|
||||
action="store_true",
|
||||
help=("Print S3 request parameters to stdout and exit without "
|
||||
"sending request"))
|
||||
parser.add_argument("-r", "--remote-name",
|
||||
help=("When uploading data from stdin, "
|
||||
"this option sets the remote filename"))
|
||||
parser.add_argument("-m", "--metadata",
|
||||
nargs="+",
|
||||
action=MetadataAction,
|
||||
metavar="KEY:VALUE",
|
||||
default={},
|
||||
help="Metadata to add to your item")
|
||||
parser.add_argument("--spreadsheet",
|
||||
type=argparse.FileType("r", encoding="utf-8-sig"),
|
||||
help="Bulk uploading")
|
||||
parser.add_argument("--file-metadata",
|
||||
type=argparse.FileType("r"),
|
||||
help="Upload files with file-level metadata via a file_md.jsonl file")
|
||||
parser.add_argument("-H", "--header",
|
||||
nargs="+",
|
||||
action=QueryStringAction,
|
||||
default={},
|
||||
help="S3 HTTP headers to send with your request")
|
||||
parser.add_argument("-c", "--checksum",
|
||||
action="store_true",
|
||||
help="Skip based on checksum")
|
||||
parser.add_argument("-v", "--verify",
|
||||
action="store_true",
|
||||
help="Verify that data was not corrupted traversing the network")
|
||||
parser.add_argument("-n", "--no-derive",
|
||||
action="store_true",
|
||||
help="Do not derive uploaded files")
|
||||
parser.add_argument("--size-hint",
|
||||
help="Specify a size-hint for your item")
|
||||
parser.add_argument("--delete",
|
||||
action="store_true",
|
||||
help="Delete files after verifying checksums")
|
||||
parser.add_argument("-R", "--retries",
|
||||
type=int,
|
||||
help="Number of times to retry request if S3 returns a 503 SlowDown error")
|
||||
parser.add_argument("-s", "--sleep",
|
||||
type=int,
|
||||
help="The amount of time to sleep between retries")
|
||||
parser.add_argument("--no-collection-check",
|
||||
action="store_true",
|
||||
help="Skip collection exists check")
|
||||
parser.add_argument("-o", "--open-after-upload",
|
||||
action="store_true",
|
||||
help="Open the details page for an item after upload")
|
||||
parser.add_argument("--no-backup",
|
||||
action="store_true",
|
||||
help="Turn off archive.org backups")
|
||||
parser.add_argument("--keep-directories",
|
||||
action="store_true",
|
||||
help="Keep directories in the supplied file paths for the remote filename")
|
||||
parser.add_argument("--no-scanner",
|
||||
action="store_true",
|
||||
help="Do not set the scanner field in meta.xml")
|
||||
parser.add_argument("--status-check",
|
||||
action="store_true",
|
||||
help="Check if S3 is accepting requests to the given item")
|
||||
|
||||
parser.set_defaults(func=lambda args: main(args, parser))
|
||||
|
||||
|
||||
def _upload_files(item, files, upload_kwargs, prev_identifier=None):
|
||||
"""
|
||||
Helper function for calling :meth:`Item.upload`
|
||||
"""
|
||||
# Check if the list has any element.
|
||||
if not files:
|
||||
raise FileNotFoundError("No valid file was found. Check your paths.")
|
||||
|
||||
responses = []
|
||||
if (upload_kwargs["verbose"]) and (prev_identifier != item.identifier):
|
||||
print(f"{item.identifier}:", file=sys.stderr)
|
||||
|
||||
try:
|
||||
response = item.upload(files, **upload_kwargs)
|
||||
responses += response
|
||||
except HTTPError as exc:
|
||||
responses += [exc.response]
|
||||
except InvalidIdentifierException as exc:
|
||||
print(str(exc), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# Debug mode.
|
||||
if upload_kwargs["debug"]:
|
||||
for i, r in enumerate(responses):
|
||||
if i != 0:
|
||||
print("---", file=sys.stderr)
|
||||
headers = "\n".join(
|
||||
[f" {k}:{v}" for (k, v) in r.headers.items()]
|
||||
)
|
||||
print(f"Endpoint:\n {r.url}\n", file=sys.stderr)
|
||||
print(f"HTTP Headers:\n{headers}", file=sys.stderr)
|
||||
|
||||
return responses
|
||||
|
||||
|
||||
def uploading_from_stdin(args):
|
||||
"""
|
||||
Check if the user is uploading from stdin.
|
||||
"""
|
||||
if not args.file:
|
||||
return False
|
||||
elif len(args.file) == 1 and args.file[0] == "-":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_if_file_arg_required(args, parser):
|
||||
required_if_no_file = [args.spreadsheet, args.file_metadata, args.status_check]
|
||||
if not args.file and not any(required_if_no_file):
|
||||
parser.error("You must specify a file to upload.")
|
||||
|
||||
|
||||
def validate_file(arg):
|
||||
if os.path.exists(arg) or arg == "-":
|
||||
return arg
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(f"'{arg}' is not a valid file or directory")
|
||||
|
||||
|
||||
def main(args, parser): # noqa: PLR0912,C901
|
||||
# TODO: Refactor to deal with PLR0912 and C901
|
||||
# add type hints
|
||||
"""
|
||||
Main entry point for 'ia upload'.
|
||||
"""
|
||||
check_if_file_arg_required(args, parser)
|
||||
|
||||
if uploading_from_stdin(args) and not args.remote_name:
|
||||
parser.error("When uploading from stdin, "
|
||||
"you must specify a remote filename with --remote-name")
|
||||
|
||||
if args.status_check: # TODO: support for checking if a specific bucket is overloaded
|
||||
if args.session.s3_is_overloaded():
|
||||
print(f"warning: {args.identifier} is over limit, and not accepting requests. "
|
||||
"Expect 503 SlowDown errors.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"success: {args.identifier} is accepting requests.", file=sys.stderr)
|
||||
sys.exit(0)
|
||||
elif args.identifier:
|
||||
item = args.session.get_item(args.identifier)
|
||||
|
||||
# Prepare upload headers and kwargs
|
||||
if args.no_derive:
|
||||
queue_derive = False
|
||||
else:
|
||||
queue_derive = True
|
||||
if args.quiet:
|
||||
verbose = False
|
||||
else:
|
||||
verbose = True
|
||||
if args.no_scanner:
|
||||
set_scanner = False
|
||||
else:
|
||||
set_scanner = True
|
||||
if args.size_hint:
|
||||
args.header["x-archive-size-hint"] = args.size_hint
|
||||
if not args.header.get("x-archive-keep-old-version") \
|
||||
and not args.no_backup:
|
||||
args.header["x-archive-keep-old-version"] = "1"
|
||||
|
||||
if args.file_metadata:
|
||||
try:
|
||||
with open(args.file_metadata) as fh:
|
||||
args.file_metadata = json.load(fh)
|
||||
except JSONDecodeError:
|
||||
args.file = []
|
||||
with open(args.file_metadata) as fh:
|
||||
for line in fh:
|
||||
j = json.loads(line.strip())
|
||||
args.file.append(j)
|
||||
|
||||
upload_kwargs = {
|
||||
"metadata": args.metadata,
|
||||
"headers": args.header,
|
||||
"debug": args.debug,
|
||||
"queue_derive": queue_derive,
|
||||
"set_scanner": set_scanner,
|
||||
"verbose": verbose,
|
||||
"verify": args.verify,
|
||||
"checksum": args.checksum,
|
||||
"retries": args.retries,
|
||||
"retries_sleep": args.sleep,
|
||||
"delete": args.delete,
|
||||
"validate_identifier": True,
|
||||
}
|
||||
|
||||
# Upload files
|
||||
errors = False
|
||||
if not args.spreadsheet:
|
||||
if uploading_from_stdin(args):
|
||||
local_file = TemporaryFile()
|
||||
# sys.stdin normally has the buffer attribute which returns bytes.
|
||||
# However, this might not always be the case, e.g. on mocking for test purposes.
|
||||
# Fall back to reading as str and encoding back to bytes.
|
||||
# Note that the encoding attribute might also be None. In that case, fall back to
|
||||
# locale.getpreferredencoding, the default of io.TextIOWrapper and open().
|
||||
if hasattr(sys.stdin, "buffer"):
|
||||
def read():
|
||||
return sys.stdin.buffer.read(1048576)
|
||||
else:
|
||||
encoding = sys.stdin.encoding or getpreferredencoding(False)
|
||||
|
||||
def read():
|
||||
return sys.stdin.read(1048576).encode(encoding)
|
||||
while True:
|
||||
data = read()
|
||||
if not data:
|
||||
break
|
||||
local_file.write(data)
|
||||
local_file.seek(0)
|
||||
else:
|
||||
local_file = args.file
|
||||
# Properly expand a period to the contents of the current working directory.
|
||||
if isinstance(local_file, str) and "." in local_file:
|
||||
local_file = [p for p in local_file if p != "."]
|
||||
local_file = os.listdir(".") + local_file
|
||||
|
||||
if isinstance(local_file, (list, tuple, set)) and args.remote_name:
|
||||
local_file = local_file[0]
|
||||
if args.remote_name:
|
||||
files = {args.remote_name: local_file}
|
||||
elif args.keep_directories:
|
||||
files = {f: f for f in local_file}
|
||||
else:
|
||||
files = local_file
|
||||
|
||||
for _r in _upload_files(item, files, upload_kwargs):
|
||||
if args.debug:
|
||||
break
|
||||
|
||||
# Check if Response is empty first (i.e. --checksum)
|
||||
# TODO: Should upload return something other than an empty Response
|
||||
# object if checksum is set and the file is already in the item?
|
||||
if _r.status_code is None:
|
||||
pass
|
||||
elif not _r.ok:
|
||||
errors = True
|
||||
else:
|
||||
if args.open_after_upload:
|
||||
url = f"{args.session.protocol}//{args.session.host}/details/{item.identifier}"
|
||||
webbrowser.open_new_tab(url)
|
||||
|
||||
# Bulk upload using spreadsheet.
|
||||
else:
|
||||
# Use the same session for each upload request.
|
||||
with args.spreadsheet as csvfp:
|
||||
spreadsheet = csv.DictReader(csvfp)
|
||||
prev_identifier = None
|
||||
for row in spreadsheet:
|
||||
for metadata_key in row:
|
||||
if not is_valid_metadata_key(metadata_key):
|
||||
print(f"error: '{metadata_key}' is not a valid metadata key.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
upload_kwargs_copy = deepcopy(upload_kwargs)
|
||||
if row.get("REMOTE_NAME"):
|
||||
local_file = {row["REMOTE_NAME"]: row["file"]}
|
||||
del row["REMOTE_NAME"]
|
||||
elif args.keep_directories:
|
||||
local_file = {row["file"]: row["file"]}
|
||||
else:
|
||||
local_file = row["file"]
|
||||
identifier = row.get("item", row.get("identifier"))
|
||||
if not identifier:
|
||||
if not prev_identifier:
|
||||
print("error: no identifier column on spreadsheet.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
identifier = prev_identifier
|
||||
del row["file"]
|
||||
if "identifier" in row:
|
||||
del row["identifier"]
|
||||
if "item" in row:
|
||||
del row["item"]
|
||||
item = args.session.get_item(identifier)
|
||||
# TODO: Clean up how indexed metadata items are coerced
|
||||
# into metadata.
|
||||
md_args = [f"{k.lower()}:{v}" for (k, v) in row.items() if v]
|
||||
metadata = get_args_dict(md_args)
|
||||
upload_kwargs_copy["metadata"].update(metadata)
|
||||
r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier)
|
||||
for _r in r:
|
||||
if args.debug:
|
||||
break
|
||||
if (not _r.status_code) or (not _r.ok):
|
||||
errors = True
|
||||
else:
|
||||
if args.open_after_upload:
|
||||
url = (f"{args.session.protocol}//{args.session.host}"
|
||||
"/details/{identifier}")
|
||||
webbrowser.open_new_tab(url)
|
||||
prev_identifier = identifier
|
||||
|
||||
if errors:
|
||||
sys.exit(1)
|
||||
Loading…
Add table
Add a link
Reference in a new issue