Files
intellecton/audit_ia.py
T

44 lines
1.4 KiB
Python
Raw Normal View History

import urllib.request
import json
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
urls_to_check = [
"recursivecoherencetheory.com",
"github.com/mrhavens",
"osf.io/7s3ta",
"osf.io/q23zs",
"osf.io/f53q2"
]
def check_wayback(url_pattern):
# Using matchType=domain or matchType=prefix
api_url = f"http://web.archive.org/cdx/search/cdx?url={url_pattern}&matchType=prefix&output=json&limit=5"
try:
req = urllib.request.Request(api_url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req, context=ctx) as response:
data = json.loads(response.read().decode())
if len(data) > 1: # first row is headers
print(f"✅ Found archives for {url_pattern}:")
for row in data[1:]:
timestamp = row[1]
original_url = row[2]
status = row[4]
print(f" - [{timestamp[:4]}-{timestamp[4:6]}-{timestamp[6:8]}] {original_url} (HTTP {status})")
else:
print(f"❌ No archives found for {url_pattern}")
except Exception as e:
print(f"❌ Error fetching {url_pattern}: {e}")
def main():
print("Auditing The Internet Archive (Wayback Machine)...\n")
for u in urls_to_check:
check_wayback(u)
print("-" * 50)
if __name__ == '__main__':
main()