commit 8c4e9f385ac40871964ea51fe76fa97f41c13180
parent fe506da7249c9921dee36219c25a3dccbf4b5560
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Tue, 15 Apr 2025 00:56:27 +0800
First sketch of build system based on a suspending scheduler and constructive trace rebuilder
Diffstat:
| A | .gitignore | | | 1 | + |
| A | make.py | | | 293 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 294 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+make.db
diff --git a/make.py b/make.py
@@ -0,0 +1,293 @@
+"""
+pymake
+------
+
+Design inspired by the paper `Build Systems à la Carte'
+
+- https://github.com/snowleopard/build
+- https://www.microsoft.com/en-us/research/wp-content/uploads/2018/03/build-systems.pdf
+
+As such, we will adopt mostly the same vocabulary:
+
+- The goal of any build system is to bring up to date a *store* that implements a mapping from *keys* to *values*.
+- Keys/values provided by the user are *inputs*, keys/values produced by the build system are *outputs*, and all other keys/values are *intermediate*.
+- *Persistent build information* are additional information kept across invocations, in addition to the store itself.
+- *Task descriptions* specify how the new value for a key should be computed based on the (current) values of the dependencies.
+- A *build system* takes a set of task descriptions, a *target key*, a store, and updates the store such that the target key is up to date.
+
+In our system, we make some slight adjustments
+
+- In fact, we don't distinguish tasks and keys -- we pass around the tasks themselves.
+ - For storage purposes, we treat task equality based on string equality of the task function.
+- We focus on implementing the suspending scheduler and constructive traces rebuilder.
+- As with the paper, we don't handle dependency cycles, since it's unclear which key to "seed" and with what "seed value".
+- While `fetch` in the paper is a parameter that's passed around, we just have it be a global function in our case.
+- Similarly, while `rebuilder` is a global that's fixed for the whole build system, we interpret it really as just a fancy way to say @cache, and so it really makes more sense to let each task choose its rebuild strategy.
+- This means that there is no such thing as an "input" or "output"/"intermediate" key, an input key is simply a key that hasn't been wrapped by a rebuilder.
+"""
+
+import asyncio
+import functools
+import pickle
+import inspect
+import collections
+import hashlib
+
+from typing import Awaitable, Callable, Any, Concatenate, Optional
+
+# Rules are functions that take in python primitives (bool, int, none, str) and tasks, and output a task.
+# Tasks are coroutine functions with a single argument `build`
+# All rules must be registered with the decorator @rule.
+#
+# For convenience, rules with no arguments can also be created by decorating @task on the coroutine function directly.
+
+
+def make_hash(o: Any) -> bytes:
+ h = hashlib.sha256()
+ if isinstance(o, bytes):
+ h.update(b"s")
+ h.update(o)
+ else:
+ h.update(b"r")
+ h.update(repr(o).encode("utf-8"))
+ return h.digest()
+
+
+class Task:
+ @staticmethod
+ def new(rule, *args):
+ return Task(
+ (
+ rule.rule_key,
+ *(arg.task_key if hasattr(arg, "task_key") else arg for arg in args),
+ ),
+ rule,
+ *args,
+ )
+
+ def __init__(self, task_key, rule, *args):
+ self.task_key = task_key
+ self.rule = rule
+ self.args = args
+ self.hash = hash(self.task_key)
+
+ def __call__(self, fetch: "Fetch"):
+ return self.rule.rule_fn(fetch, *self.args)
+
+ def __repr__(self):
+ return repr(self.task_key)
+
+ def __eq__(self, other):
+ return self.task_key == other.task_key
+
+ def __hash__(self):
+ return self.hash
+
+
+class Fetch:
+ fetch_fn: Callable[[Task], Awaitable[Any]]
+ task: Task
+ build: "Build"
+
+ def __init__(self, fetch_fn, task, build):
+ self.fetch_fn = fetch_fn
+ self.task = task
+ self.build = build
+
+ def __call__(self, dep: Task):
+ return self.fetch_fn(dep)
+
+
+class Rule:
+ rule_key: str
+ rule_fn: Callable[Concatenate[Fetch, ...], Awaitable[Any]]
+
+ @staticmethod
+ def new(rule_fn):
+ name = rule_fn.__name__
+ source = inspect.getsource(rule_fn)
+ h = hashlib.sha256(source.encode("utf-8")).hexdigest()[:16]
+ return Rule(f"{name}-{len(source)}-{h}", rule_fn)
+
+ def __init__(self, rule_key, rule_fn):
+ self.rule_key = rule_key
+ self.rule_fn = rule_fn
+ self.hash = hash(self.rule_key)
+
+ def __call__(self, *args):
+ return Task.new(self, *args)
+
+ def __eq__(self, other):
+ return self.rule_key == other.rule_key
+
+ def __hash__(self):
+ return self.hash
+
+
+class Rules:
+ def __init__(self):
+ self.rules = dict()
+
+ def rule(self):
+ def decorator(rule_fn):
+ rule = Rule.new(rule_fn)
+ self.rules[rule.rule_key] = rule
+ return rule
+
+ return decorator
+
+ def eval_task_key(self, task_key) -> Optional[Task]:
+ rule_key, *arg_keys = task_key
+ if rule_key not in self.rules:
+ return None
+ for arg in arg_keys:
+ if isinstance(arg, tuple) and arg[0] not in self.rules:
+ return None
+ rule = self.rules[rule_key]
+ args = (
+ self.eval_task_key(arg) if isinstance(arg, tuple) else arg
+ for arg in arg_keys
+ )
+ return rule(*args)
+
+ # Wraps a rule so it only gets rebuilt if the constructive traces don't match
+ def ctRebuilder(self):
+ def decorator(rule: Rule):
+ @functools.wraps(rule.rule_fn)
+ async def new_rule_fn(fetch, *args):
+ past_runs = fetch.build.key_info[fetch.task.task_key]
+ output_value = fetch.build.key_value[fetch.task.task_key]
+ possible_values = []
+ for past_inputs, past_value in past_runs:
+ for past_input_key, past_input_hash in past_inputs:
+ input_task = self.eval_task_key(past_input_key)
+ if not input_task:
+ break
+ current_input_value = await fetch(input_task)
+ if make_hash(current_input_value) != past_input_hash:
+ break
+ else:
+ if output_value == past_value:
+ return past_value
+ possible_values.append(past_value)
+
+ if possible_values:
+ fetch.build.key_value[fetch.task.task_key] = possible_values[0]
+ return possible_values[0]
+
+ new_inputs = []
+
+ async def track(task: Task):
+ result = await fetch(task)
+ new_inputs.append((task.task_key, make_hash(result)))
+ return result
+
+ task = Task.new(rule, *args)
+ new_value = await task(Fetch(track, task, fetch.build))
+ fetch.build.key_value[fetch.task.task_key] = new_value
+ fetch.build.key_info[fetch.task.task_key].append(
+ (new_inputs, new_value)
+ )
+ return new_value
+
+ wrapped_rule = Rule(rule.rule_key, new_rule_fn)
+ self.rules[rule.rule_key] = wrapped_rule
+ return wrapped_rule
+
+ return decorator
+
+
+rules = Rules()
+rule = rules.rule()
+ctRebuilder = rules.ctRebuilder()
+
+
+# Example rule
+@ctRebuilder
+@rule
+async def eg_six(fetch: Fetch):
+ _ = fetch
+ print(f"{6=}")
+ return 6
+
+
+# Example of a rule with a dependency
+@rule
+async def eg_thirtysix(fetch: Fetch):
+ # Rules should be called to get tasks
+ # In this case, the rule had 0 tasks
+ # task = eg_six()
+ # call fetch to mark a dependency,
+ # (and begins execution of it in parallel if possible.)
+ # `await` it get the result of the dependency
+ six1 = await fetch(eg_six())
+ six2 = await fetch(eg_six())
+ print(f"{six1 * six2=}")
+ return six1 * six2
+
+
+# Tasks can be parameterized based on other tasks or just normal values.
+@rule
+async def eg_multiply_add(fetch: Fetch, taskA: Task, taskB: Task, num: int):
+ a, b = await asyncio.gather(fetch(taskA), fetch(taskB))
+ print(f"{a * b + num=}")
+ return a * b + num
+
+
+def fNone():
+ return None
+
+
+class Build:
+ def __init__(self, filename, rules):
+ self.filename = filename
+ self.rules = rules
+
+ self.key_value = collections.defaultdict(fNone)
+ self.key_info = collections.defaultdict(list)
+
+ try:
+ with open(filename, "rb") as f:
+ self.key_value, self.key_info = pickle.load(f)
+ except:
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ with open(self.filename, "wb") as f:
+ pickle.dump((self.key_value, self.key_info), f)
+
+
+@rule
+async def eg_file(fetch: Fetch, filename: str):
+ print("file", filename)
+ with open(filename, "r") as f:
+ return f.read()
+
+
+@ctRebuilder
+@rule
+async def eg_rec(fetch: Fetch, i: int):
+ print("rec", i)
+ j = len(await fetch(eg_file("make.py"))) % 2
+ if i > 0:
+ await fetch(eg_rec(i - 1 - j))
+ await fetch(eg_rec(i - 1 - j))
+ else:
+ print("\n".join((await fetch(eg_file("make.py"))).splitlines()[:9]))
+
+
+if __name__ == "__main__":
+ with Build("make.db", rules) as build:
+ done = dict()
+
+ async def fetch(task: Task):
+ if task.task_key in done:
+ return done[task.task_key]
+ result = await task(Fetch(fetch, task, build))
+ done[task.task_key] = result
+ return result
+
+ asyncio.run(fetch(eg_rec(10)))