commit 7a0b376f9f09c65da7ba683ed4cc1034dffc345e
parent e30ca6a154cafff84da8b0f89cc660fa8e5717c9
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Sun, 20 Apr 2025 05:54:23 +0800
Add proof of concept of build system based on a single primitive
Diffstat:
4 files changed, 139 insertions(+), 8 deletions(-)
diff --git a/tar-sketch/README.md b/tar-sketch/README.md
@@ -1,5 +1,7 @@
-This directory contains a sketch of how one might do an optimal rebuilding version of a rule with dynamic dependencies by hand.
+This directory contains a sketch of optimal rebuilding of a rule with dynamic dependencies.
The `tar` function opens the `manifest` file and puts files listed in it into an archive at `output`.
-
Thus, the dependencies of `tar` is dynamic, we want to rerun `tar` if either the manifest changes (static) or if one of its input files changed (dynamic).
+
+tar.py is a version where the dependency tracking logic is written by hand.
+tar2.py is a version using a minimal build system with a single primitive (rerun_if).
diff --git a/tar-sketch/a.txt b/tar-sketch/a.txt
@@ -1 +1 @@
-edit
+Sun Apr 20 06:11:20 AM +08 2025
diff --git a/tar-sketch/tar.py b/tar-sketch/tar.py
@@ -11,20 +11,22 @@ try:
except BaseException as e:
pass
+file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns
+
def file_hash(filename):
with open(filename, "rb") as f:
- res = os.stat(filename)
- if res.st_mtime_ns == db.get(f"file:{filename}:modtime", None):
+ modtime = file_modtime(f)
+ if modtime == db.get(f"file:{filename}:modtime", None):
print("skipping hash", db[f"file:{filename}:hash"])
return db[f"file:{filename}:hash"]
- db[f"file:{filename}:modtime"] = res.st_mtime_ns
+ db[f"file:{filename}:modtime"] = modtime
h = hashlib.sha256()
for chunk in f:
h.update(chunk)
d = h.hexdigest()
db[f"file:{filename}:hash"] = d
- print("hashed", db[f"file:{filename}:hash"])
+ print("hash", filename, d)
return d
@@ -45,7 +47,7 @@ def tar(manifest=b"manifest", output=b"archive.tar.gz"):
file_hashes[fname] = file_hash(fname)
db["tar:file_hashes"] = file_hashes
- print("running tar")
+ print("tar", "cvzf", output, *manifest_lines)
subprocess.run([b"tar", b"cvzf", output, *manifest_lines])
diff --git a/tar-sketch/tar2.py b/tar-sketch/tar2.py
@@ -0,0 +1,127 @@
+import contextvars
+import functools
+import inspect
+import pickle
+
+db: dict = dict()
+
+try:
+ with open(b".makedb", "rb") as f:
+ db = pickle.load(f)
+except BaseException as e:
+ pass
+
+
+def get_or_set(d, k, v):
+ if k in d:
+ return d[k]
+ d[k] = v
+ return v
+
+
+rerun_ifs_var = contextvars.ContextVar("rerun_ifs")
+
+
+def with_rerun_context(rerun_ifs, f, /, *args, **kwargs):
+ rerun_ifs_var.set(rerun_ifs)
+ return f(*args, **kwargs)
+
+
+def rerun_if(f_str):
+ rerun_ifs_var.get().append(f_str)
+
+
+def cache_conditionally(
+ keys_fn=lambda *args, **kwargs: (args, tuple(sorted(kwargs.items()))),
+ if_cached_fn=lambda cached_result, /, *_, **__: cached_result,
+):
+ def decorator(fn):
+ sig = inspect.signature(fn)
+ defaults = {
+ p.name: p.default
+ for p in sig.parameters.values()
+ if p.default != inspect.Parameter.empty
+ }
+ sig = sig.replace(
+ parameters=tuple(
+ inspect.Parameter(name=p.name, kind=p.kind)
+ for p in sig.parameters.values()
+ )
+ )
+ sig_str = sig.format().lstrip("(").rstrip(")")
+
+ @functools.wraps(fn)
+ def wrapped(*args, **kwargs):
+ keys = keys_fn(*args, **kwargs)
+ if ("track", "result", fn.__qualname__, keys) in db:
+ old_rerun_cond_list = get_or_set(
+ db, ("track", "rerun_ifs", fn.__qualname__, keys), []
+ )
+ for f in old_rerun_cond_list:
+ new_kwargs = defaults.copy()
+ new_kwargs.update(kwargs)
+ res = eval(f"lambda {sig_str}: {f}")(*args, **new_kwargs)
+ if res:
+ break
+ else:
+ return if_cached_fn(
+ db[("track", "result", fn.__qualname__, keys)], *args, **kwargs
+ )
+
+ context = contextvars.copy_context()
+ rerun_ifs = []
+ result = context.run(with_rerun_context, rerun_ifs, fn, *args, **kwargs)
+ db[("track", "rerun_ifs", fn.__qualname__, keys)] = rerun_ifs
+ db[("track", "result", fn.__qualname__, keys)] = result
+ return result
+
+ return wrapped
+
+ return decorator
+
+
+import hashlib
+import os
+import subprocess
+
+from io import BufferedReader
+
+
+file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns
+
+
+@cache_conditionally(lambda f: f.name)
+def _file_hash(f: BufferedReader):
+ rerun_if(f"file_modtime(f) != {repr(file_modtime(f))}")
+ h = hashlib.sha256()
+ for chunk in f:
+ h.update(chunk)
+ d = h.hexdigest()
+ print("hash", f.name, d)
+ return d
+
+
+def file_hash(f: BufferedReader | bytes | str):
+ if isinstance(f, bytes) or isinstance(f, str):
+ with open(f, "rb") as f:
+ return _file_hash(f)
+ return _file_hash(f)
+
+
+@cache_conditionally()
+def tar(manifest=b"manifest", output=b"archive.tar.gz"):
+ with open(manifest, "rb") as manifest_f:
+ manifest_lines = manifest_f.read().splitlines()
+ rerun_if(f"file_hash({repr(manifest)}) != {repr(file_hash(manifest_f))}")
+ for fname in manifest_lines:
+ rerun_if(f"file_hash({repr(fname)}) != {repr(file_hash(fname))}")
+
+ print("tar", "cvzf", output, *manifest_lines)
+ subprocess.run([b"tar", b"cvzf", output, *manifest_lines])
+ rerun_if(f"file_hash({repr(output)}) != {repr(file_hash(output))}")
+
+
+tar()
+
+with open(b".makedb", "wb") as f:
+ pickle.dump(db, f)