commit e30ca6a154cafff84da8b0f89cc660fa8e5717c9
parent 49686f32e26929787833cdaf3d7e5e70efa997d4
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Sun, 20 Apr 2025 03:26:16 +0800
Add a sketch of how a simple tar rule might be implemented by hand
Diffstat:
6 files changed, 65 insertions(+), 0 deletions(-)
diff --git a/tar-sketch/.gitignore b/tar-sketch/.gitignore
@@ -0,0 +1 @@
+archive.tar.gz
diff --git a/tar-sketch/README.md b/tar-sketch/README.md
@@ -0,0 +1,5 @@
+This directory contains a sketch of how one might do an optimal rebuilding version of a rule with dynamic dependencies by hand.
+
+The `tar` function opens the `manifest` file and puts files listed in it into an archive at `output`.
+
+Thus, the dependencies of `tar` is dynamic, we want to rerun `tar` if either the manifest changes (static) or if one of its input files changed (dynamic).
diff --git a/tar-sketch/a.txt b/tar-sketch/a.txt
@@ -0,0 +1 @@
+edit
diff --git a/tar-sketch/b.txt b/tar-sketch/b.txt
@@ -0,0 +1 @@
+b.txt
diff --git a/tar-sketch/manifest b/tar-sketch/manifest
@@ -0,0 +1,2 @@
+a.txt
+b.txt
diff --git a/tar-sketch/tar.py b/tar-sketch/tar.py
@@ -0,0 +1,55 @@
+import hashlib
+import os
+import pickle
+import subprocess
+
+db: dict = dict()
+
+try:
+ with open(b".makedb", "rb") as f:
+ db = pickle.load(f)
+except BaseException as e:
+ pass
+
+
+def file_hash(filename):
+ with open(filename, "rb") as f:
+ res = os.stat(filename)
+ if res.st_mtime_ns == db.get(f"file:{filename}:modtime", None):
+ print("skipping hash", db[f"file:{filename}:hash"])
+ return db[f"file:{filename}:hash"]
+ db[f"file:{filename}:modtime"] = res.st_mtime_ns
+ h = hashlib.sha256()
+ for chunk in f:
+ h.update(chunk)
+ d = h.hexdigest()
+ db[f"file:{filename}:hash"] = d
+ print("hashed", db[f"file:{filename}:hash"])
+ return d
+
+
+def tar(manifest=b"manifest", output=b"archive.tar.gz"):
+ # Early exit if hashes match previous run
+ if "tar:file_hashes" in db:
+ for fname, fhash in db["tar:file_hashes"].items():
+ if file_hash(fname) != fhash:
+ break
+ else:
+ return
+
+ with open(manifest, "rb") as f:
+ manifest_lines = f.read().splitlines()
+ file_hashes = dict()
+ file_hashes[manifest] = file_hash(manifest)
+ for fname in manifest_lines:
+ file_hashes[fname] = file_hash(fname)
+ db["tar:file_hashes"] = file_hashes
+
+ print("running tar")
+ subprocess.run([b"tar", b"cvzf", output, *manifest_lines])
+
+
+tar()
+
+with open(b".makedb", "wb") as f:
+ pickle.dump(db, f)