44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
|
|
import urllib.request
|
||
|
|
import json
|
||
|
|
import ssl
|
||
|
|
|
||
|
|
ctx = ssl.create_default_context()
|
||
|
|
ctx.check_hostname = False
|
||
|
|
ctx.verify_mode = ssl.CERT_NONE
|
||
|
|
|
||
|
|
urls_to_check = [
|
||
|
|
"recursivecoherencetheory.com",
|
||
|
|
"github.com/mrhavens",
|
||
|
|
"osf.io/7s3ta",
|
||
|
|
"osf.io/q23zs",
|
||
|
|
"osf.io/f53q2"
|
||
|
|
]
|
||
|
|
|
||
|
|
def check_wayback(url_pattern):
|
||
|
|
# Using matchType=domain or matchType=prefix
|
||
|
|
api_url = f"http://web.archive.org/cdx/search/cdx?url={url_pattern}&matchType=prefix&output=json&limit=5"
|
||
|
|
try:
|
||
|
|
req = urllib.request.Request(api_url, headers={'User-Agent': 'Mozilla/5.0'})
|
||
|
|
with urllib.request.urlopen(req, context=ctx) as response:
|
||
|
|
data = json.loads(response.read().decode())
|
||
|
|
if len(data) > 1: # first row is headers
|
||
|
|
print(f"✅ Found archives for {url_pattern}:")
|
||
|
|
for row in data[1:]:
|
||
|
|
timestamp = row[1]
|
||
|
|
original_url = row[2]
|
||
|
|
status = row[4]
|
||
|
|
print(f" - [{timestamp[:4]}-{timestamp[4:6]}-{timestamp[6:8]}] {original_url} (HTTP {status})")
|
||
|
|
else:
|
||
|
|
print(f"❌ No archives found for {url_pattern}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error fetching {url_pattern}: {e}")
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("Auditing The Internet Archive (Wayback Machine)...\n")
|
||
|
|
for u in urls_to_check:
|
||
|
|
check_wayback(u)
|
||
|
|
print("-" * 50)
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|