import urllib.request import json import ssl ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE urls_to_check = [ "recursivecoherencetheory.com", "github.com/mrhavens", "osf.io/7s3ta", "osf.io/q23zs", "osf.io/f53q2" ] def check_wayback(url_pattern): # Using matchType=domain or matchType=prefix api_url = f"http://web.archive.org/cdx/search/cdx?url={url_pattern}&matchType=prefix&output=json&limit=5" try: req = urllib.request.Request(api_url, headers={'User-Agent': 'Mozilla/5.0'}) with urllib.request.urlopen(req, context=ctx) as response: data = json.loads(response.read().decode()) if len(data) > 1: # first row is headers print(f"✅ Found archives for {url_pattern}:") for row in data[1:]: timestamp = row[1] original_url = row[2] status = row[4] print(f" - [{timestamp[:4]}-{timestamp[4:6]}-{timestamp[6:8]}] {original_url} (HTTP {status})") else: print(f"❌ No archives found for {url_pattern}") except Exception as e: print(f"❌ Error fetching {url_pattern}: {e}") def main(): print("Auditing The Internet Archive (Wayback Machine)...\n") for u in urls_to_check: check_wayback(u) print("-" * 50) if __name__ == '__main__': main()