pymake

A build system based on Build Systems à la Carte
git clone https://git.grace.moe/pymake
Log | Files | Refs | README

commit 7a0b376f9f09c65da7ba683ed4cc1034dffc345e
parent e30ca6a154cafff84da8b0f89cc660fa8e5717c9
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Sun, 20 Apr 2025 05:54:23 +0800

Add proof of concept of build system based on a single primitive

Diffstat:
Mtar-sketch/README.md | 6++++--
Mtar-sketch/a.txt | 2+-
Mtar-sketch/tar.py | 12+++++++-----
Atar-sketch/tar2.py | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 139 insertions(+), 8 deletions(-)

diff --git a/tar-sketch/README.md b/tar-sketch/README.md @@ -1,5 +1,7 @@ -This directory contains a sketch of how one might do an optimal rebuilding version of a rule with dynamic dependencies by hand. +This directory contains a sketch of optimal rebuilding of a rule with dynamic dependencies. The `tar` function opens the `manifest` file and puts files listed in it into an archive at `output`. - Thus, the dependencies of `tar` is dynamic, we want to rerun `tar` if either the manifest changes (static) or if one of its input files changed (dynamic). + +tar.py is a version where the dependency tracking logic is written by hand. +tar2.py is a version using a minimal build system with a single primitive (rerun_if). diff --git a/tar-sketch/a.txt b/tar-sketch/a.txt @@ -1 +1 @@ -edit +Sun Apr 20 06:11:20 AM +08 2025 diff --git a/tar-sketch/tar.py b/tar-sketch/tar.py @@ -11,20 +11,22 @@ try: except BaseException as e: pass +file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns + def file_hash(filename): with open(filename, "rb") as f: - res = os.stat(filename) - if res.st_mtime_ns == db.get(f"file:{filename}:modtime", None): + modtime = file_modtime(f) + if modtime == db.get(f"file:{filename}:modtime", None): print("skipping hash", db[f"file:{filename}:hash"]) return db[f"file:{filename}:hash"] - db[f"file:{filename}:modtime"] = res.st_mtime_ns + db[f"file:{filename}:modtime"] = modtime h = hashlib.sha256() for chunk in f: h.update(chunk) d = h.hexdigest() db[f"file:{filename}:hash"] = d - print("hashed", db[f"file:{filename}:hash"]) + print("hash", filename, d) return d @@ -45,7 +47,7 @@ def tar(manifest=b"manifest", output=b"archive.tar.gz"): file_hashes[fname] = file_hash(fname) db["tar:file_hashes"] = file_hashes - print("running tar") + print("tar", "cvzf", output, *manifest_lines) subprocess.run([b"tar", b"cvzf", output, *manifest_lines]) diff --git a/tar-sketch/tar2.py b/tar-sketch/tar2.py @@ -0,0 +1,127 @@ +import contextvars +import functools +import inspect +import pickle + +db: dict = dict() + +try: + with open(b".makedb", "rb") as f: + db = pickle.load(f) +except BaseException as e: + pass + + +def get_or_set(d, k, v): + if k in d: + return d[k] + d[k] = v + return v + + +rerun_ifs_var = contextvars.ContextVar("rerun_ifs") + + +def with_rerun_context(rerun_ifs, f, /, *args, **kwargs): + rerun_ifs_var.set(rerun_ifs) + return f(*args, **kwargs) + + +def rerun_if(f_str): + rerun_ifs_var.get().append(f_str) + + +def cache_conditionally( + keys_fn=lambda *args, **kwargs: (args, tuple(sorted(kwargs.items()))), + if_cached_fn=lambda cached_result, /, *_, **__: cached_result, +): + def decorator(fn): + sig = inspect.signature(fn) + defaults = { + p.name: p.default + for p in sig.parameters.values() + if p.default != inspect.Parameter.empty + } + sig = sig.replace( + parameters=tuple( + inspect.Parameter(name=p.name, kind=p.kind) + for p in sig.parameters.values() + ) + ) + sig_str = sig.format().lstrip("(").rstrip(")") + + @functools.wraps(fn) + def wrapped(*args, **kwargs): + keys = keys_fn(*args, **kwargs) + if ("track", "result", fn.__qualname__, keys) in db: + old_rerun_cond_list = get_or_set( + db, ("track", "rerun_ifs", fn.__qualname__, keys), [] + ) + for f in old_rerun_cond_list: + new_kwargs = defaults.copy() + new_kwargs.update(kwargs) + res = eval(f"lambda {sig_str}: {f}")(*args, **new_kwargs) + if res: + break + else: + return if_cached_fn( + db[("track", "result", fn.__qualname__, keys)], *args, **kwargs + ) + + context = contextvars.copy_context() + rerun_ifs = [] + result = context.run(with_rerun_context, rerun_ifs, fn, *args, **kwargs) + db[("track", "rerun_ifs", fn.__qualname__, keys)] = rerun_ifs + db[("track", "result", fn.__qualname__, keys)] = result + return result + + return wrapped + + return decorator + + +import hashlib +import os +import subprocess + +from io import BufferedReader + + +file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns + + +@cache_conditionally(lambda f: f.name) +def _file_hash(f: BufferedReader): + rerun_if(f"file_modtime(f) != {repr(file_modtime(f))}") + h = hashlib.sha256() + for chunk in f: + h.update(chunk) + d = h.hexdigest() + print("hash", f.name, d) + return d + + +def file_hash(f: BufferedReader | bytes | str): + if isinstance(f, bytes) or isinstance(f, str): + with open(f, "rb") as f: + return _file_hash(f) + return _file_hash(f) + + +@cache_conditionally() +def tar(manifest=b"manifest", output=b"archive.tar.gz"): + with open(manifest, "rb") as manifest_f: + manifest_lines = manifest_f.read().splitlines() + rerun_if(f"file_hash({repr(manifest)}) != {repr(file_hash(manifest_f))}") + for fname in manifest_lines: + rerun_if(f"file_hash({repr(fname)}) != {repr(file_hash(fname))}") + + print("tar", "cvzf", output, *manifest_lines) + subprocess.run([b"tar", b"cvzf", output, *manifest_lines]) + rerun_if(f"file_hash({repr(output)}) != {repr(file_hash(output))}") + + +tar() + +with open(b".makedb", "wb") as f: + pickle.dump(db, f)