pymake

A build system based on Build Systems à la Carte
git clone https://git.grace.moe/pymake
Log | Files | Refs | README

commit 8c4e9f385ac40871964ea51fe76fa97f41c13180
parent fe506da7249c9921dee36219c25a3dccbf4b5560
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Tue, 15 Apr 2025 00:56:27 +0800

First sketch of build system based on a suspending scheduler and constructive trace rebuilder

Diffstat:
A.gitignore | 1+
Amake.py | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 294 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +make.db diff --git a/make.py b/make.py @@ -0,0 +1,293 @@ +""" +pymake +------ + +Design inspired by the paper `Build Systems à la Carte' + +- https://github.com/snowleopard/build +- https://www.microsoft.com/en-us/research/wp-content/uploads/2018/03/build-systems.pdf + +As such, we will adopt mostly the same vocabulary: + +- The goal of any build system is to bring up to date a *store* that implements a mapping from *keys* to *values*. +- Keys/values provided by the user are *inputs*, keys/values produced by the build system are *outputs*, and all other keys/values are *intermediate*. +- *Persistent build information* are additional information kept across invocations, in addition to the store itself. +- *Task descriptions* specify how the new value for a key should be computed based on the (current) values of the dependencies. +- A *build system* takes a set of task descriptions, a *target key*, a store, and updates the store such that the target key is up to date. + +In our system, we make some slight adjustments + +- In fact, we don't distinguish tasks and keys -- we pass around the tasks themselves. + - For storage purposes, we treat task equality based on string equality of the task function. +- We focus on implementing the suspending scheduler and constructive traces rebuilder. +- As with the paper, we don't handle dependency cycles, since it's unclear which key to "seed" and with what "seed value". +- While `fetch` in the paper is a parameter that's passed around, we just have it be a global function in our case. +- Similarly, while `rebuilder` is a global that's fixed for the whole build system, we interpret it really as just a fancy way to say @cache, and so it really makes more sense to let each task choose its rebuild strategy. +- This means that there is no such thing as an "input" or "output"/"intermediate" key, an input key is simply a key that hasn't been wrapped by a rebuilder. +""" + +import asyncio +import functools +import pickle +import inspect +import collections +import hashlib + +from typing import Awaitable, Callable, Any, Concatenate, Optional + +# Rules are functions that take in python primitives (bool, int, none, str) and tasks, and output a task. +# Tasks are coroutine functions with a single argument `build` +# All rules must be registered with the decorator @rule. +# +# For convenience, rules with no arguments can also be created by decorating @task on the coroutine function directly. + + +def make_hash(o: Any) -> bytes: + h = hashlib.sha256() + if isinstance(o, bytes): + h.update(b"s") + h.update(o) + else: + h.update(b"r") + h.update(repr(o).encode("utf-8")) + return h.digest() + + +class Task: + @staticmethod + def new(rule, *args): + return Task( + ( + rule.rule_key, + *(arg.task_key if hasattr(arg, "task_key") else arg for arg in args), + ), + rule, + *args, + ) + + def __init__(self, task_key, rule, *args): + self.task_key = task_key + self.rule = rule + self.args = args + self.hash = hash(self.task_key) + + def __call__(self, fetch: "Fetch"): + return self.rule.rule_fn(fetch, *self.args) + + def __repr__(self): + return repr(self.task_key) + + def __eq__(self, other): + return self.task_key == other.task_key + + def __hash__(self): + return self.hash + + +class Fetch: + fetch_fn: Callable[[Task], Awaitable[Any]] + task: Task + build: "Build" + + def __init__(self, fetch_fn, task, build): + self.fetch_fn = fetch_fn + self.task = task + self.build = build + + def __call__(self, dep: Task): + return self.fetch_fn(dep) + + +class Rule: + rule_key: str + rule_fn: Callable[Concatenate[Fetch, ...], Awaitable[Any]] + + @staticmethod + def new(rule_fn): + name = rule_fn.__name__ + source = inspect.getsource(rule_fn) + h = hashlib.sha256(source.encode("utf-8")).hexdigest()[:16] + return Rule(f"{name}-{len(source)}-{h}", rule_fn) + + def __init__(self, rule_key, rule_fn): + self.rule_key = rule_key + self.rule_fn = rule_fn + self.hash = hash(self.rule_key) + + def __call__(self, *args): + return Task.new(self, *args) + + def __eq__(self, other): + return self.rule_key == other.rule_key + + def __hash__(self): + return self.hash + + +class Rules: + def __init__(self): + self.rules = dict() + + def rule(self): + def decorator(rule_fn): + rule = Rule.new(rule_fn) + self.rules[rule.rule_key] = rule + return rule + + return decorator + + def eval_task_key(self, task_key) -> Optional[Task]: + rule_key, *arg_keys = task_key + if rule_key not in self.rules: + return None + for arg in arg_keys: + if isinstance(arg, tuple) and arg[0] not in self.rules: + return None + rule = self.rules[rule_key] + args = ( + self.eval_task_key(arg) if isinstance(arg, tuple) else arg + for arg in arg_keys + ) + return rule(*args) + + # Wraps a rule so it only gets rebuilt if the constructive traces don't match + def ctRebuilder(self): + def decorator(rule: Rule): + @functools.wraps(rule.rule_fn) + async def new_rule_fn(fetch, *args): + past_runs = fetch.build.key_info[fetch.task.task_key] + output_value = fetch.build.key_value[fetch.task.task_key] + possible_values = [] + for past_inputs, past_value in past_runs: + for past_input_key, past_input_hash in past_inputs: + input_task = self.eval_task_key(past_input_key) + if not input_task: + break + current_input_value = await fetch(input_task) + if make_hash(current_input_value) != past_input_hash: + break + else: + if output_value == past_value: + return past_value + possible_values.append(past_value) + + if possible_values: + fetch.build.key_value[fetch.task.task_key] = possible_values[0] + return possible_values[0] + + new_inputs = [] + + async def track(task: Task): + result = await fetch(task) + new_inputs.append((task.task_key, make_hash(result))) + return result + + task = Task.new(rule, *args) + new_value = await task(Fetch(track, task, fetch.build)) + fetch.build.key_value[fetch.task.task_key] = new_value + fetch.build.key_info[fetch.task.task_key].append( + (new_inputs, new_value) + ) + return new_value + + wrapped_rule = Rule(rule.rule_key, new_rule_fn) + self.rules[rule.rule_key] = wrapped_rule + return wrapped_rule + + return decorator + + +rules = Rules() +rule = rules.rule() +ctRebuilder = rules.ctRebuilder() + + +# Example rule +@ctRebuilder +@rule +async def eg_six(fetch: Fetch): + _ = fetch + print(f"{6=}") + return 6 + + +# Example of a rule with a dependency +@rule +async def eg_thirtysix(fetch: Fetch): + # Rules should be called to get tasks + # In this case, the rule had 0 tasks + # task = eg_six() + # call fetch to mark a dependency, + # (and begins execution of it in parallel if possible.) + # `await` it get the result of the dependency + six1 = await fetch(eg_six()) + six2 = await fetch(eg_six()) + print(f"{six1 * six2=}") + return six1 * six2 + + +# Tasks can be parameterized based on other tasks or just normal values. +@rule +async def eg_multiply_add(fetch: Fetch, taskA: Task, taskB: Task, num: int): + a, b = await asyncio.gather(fetch(taskA), fetch(taskB)) + print(f"{a * b + num=}") + return a * b + num + + +def fNone(): + return None + + +class Build: + def __init__(self, filename, rules): + self.filename = filename + self.rules = rules + + self.key_value = collections.defaultdict(fNone) + self.key_info = collections.defaultdict(list) + + try: + with open(filename, "rb") as f: + self.key_value, self.key_info = pickle.load(f) + except: + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + with open(self.filename, "wb") as f: + pickle.dump((self.key_value, self.key_info), f) + + +@rule +async def eg_file(fetch: Fetch, filename: str): + print("file", filename) + with open(filename, "r") as f: + return f.read() + + +@ctRebuilder +@rule +async def eg_rec(fetch: Fetch, i: int): + print("rec", i) + j = len(await fetch(eg_file("make.py"))) % 2 + if i > 0: + await fetch(eg_rec(i - 1 - j)) + await fetch(eg_rec(i - 1 - j)) + else: + print("\n".join((await fetch(eg_file("make.py"))).splitlines()[:9])) + + +if __name__ == "__main__": + with Build("make.db", rules) as build: + done = dict() + + async def fetch(task: Task): + if task.task_key in done: + return done[task.task_key] + result = await task(Fetch(fetch, task, build)) + done[task.task_key] = result + return result + + asyncio.run(fetch(eg_rec(10)))