tar.py (1667B)
1 import hashlib 2 import os 3 import pickle 4 import subprocess 5 6 from io import BufferedReader 7 8 db: dict = dict() 9 10 try: 11 with open(b".makedb", "rb") as f: 12 db = pickle.load(f) 13 except BaseException as e: 14 pass 15 16 file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns 17 18 19 def _file_hash(f: BufferedReader): 20 modtime = file_modtime(f) 21 if modtime == db.get(f"file:{f.name}:modtime", None): 22 return db[f"file:{f.name}:hash"] 23 db[f"file:{f.name}:modtime"] = modtime 24 h = hashlib.sha256() 25 for chunk in f: 26 h.update(chunk) 27 d = h.hexdigest() 28 db[f"file:{f.name}:hash"] = d 29 print("hash", f.name, d) 30 return d 31 32 33 def file_hash(f: BufferedReader | bytes | str): 34 if isinstance(f, bytes) or isinstance(f, str): 35 with open(f, "rb") as f: 36 return _file_hash(f) 37 return _file_hash(f) 38 39 40 def tar(manifest=b"manifest", output=b"archive.tar.gz"): 41 # Early exit if hashes match previous run 42 if "tar:file_hashes" in db: 43 for fname, fhash in db["tar:file_hashes"].items(): 44 if file_hash(fname) != fhash: 45 break 46 else: 47 return 48 49 with open(manifest, "rb") as f: 50 manifest_lines = f.read().splitlines() 51 file_hashes = dict() 52 file_hashes[manifest] = file_hash(manifest) 53 for fname in manifest_lines: 54 file_hashes[fname] = file_hash(fname) 55 56 print("tar", "cvzf", output, *manifest_lines) 57 subprocess.run([b"tar", b"cvzf", output, *manifest_lines]) 58 59 file_hashes[output] = file_hash(output) 60 db["tar:file_hashes"] = file_hashes 61 62 63 tar() 64 65 with open(b".makedb", "wb") as f: 66 pickle.dump(db, f)