import urllib.request
import json
import ssl

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

urls_to_check = [
    "recursivecoherencetheory.com",
    "github.com/mrhavens",
    "osf.io/7s3ta",
    "osf.io/q23zs",
    "osf.io/f53q2"
]

def check_wayback(url_pattern):
    # Using matchType=domain or matchType=prefix
    api_url = f"http://web.archive.org/cdx/search/cdx?url={url_pattern}&matchType=prefix&output=json&limit=5"
    try:
        req = urllib.request.Request(api_url, headers={'User-Agent': 'Mozilla/5.0'})
        with urllib.request.urlopen(req, context=ctx) as response:
            data = json.loads(response.read().decode())
            if len(data) > 1: # first row is headers
                print(f"✅ Found archives for {url_pattern}:")
                for row in data[1:]:
                    timestamp = row[1]
                    original_url = row[2]
                    status = row[4]
                    print(f"  - [{timestamp[:4]}-{timestamp[4:6]}-{timestamp[6:8]}] {original_url} (HTTP {status})")
            else:
                print(f"❌ No archives found for {url_pattern}")
    except Exception as e:
        print(f"❌ Error fetching {url_pattern}: {e}")

def main():
    print("Auditing The Internet Archive (Wayback Machine)...\n")
    for u in urls_to_check:
        check_wayback(u)
        print("-" * 50)

if __name__ == '__main__':
    main()