pymake

A build system based on Build Systems à la Carte
git clone https://git.grace.moe/pymake
Log | Files | Refs | README

commit 81f44ad2bc289f1f77b5854c1bad4a7d20755fbd
parent 627941f7673222f72eb5c654bcffe08504a2c798
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Tue, 15 Apr 2025 22:20:37 +0800

Modify the suspending scheduler to use the Store, and edit a bunch of comments

Diffstat:
Mexamples.py | 8++++----
Mmake.py | 69++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
2 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/examples.py b/examples.py @@ -82,10 +82,10 @@ async def main(): ) # Note that `build(...)` will wait for all detached jobs to complete before returning. - # You may choose to use the lower level `build.fetch(...)` function instead, which does not wait for detached jobs. - # You must then ensure `build.wait()` is called later to wait for detached jobs to complete. - detach(build.fetch(_eg_rec(2345))) - await build.fetch(_eg_rec(3456)) + # You may instead use `build.build(...)`, which does not wait for detached jobs. + # You should ensure `build.wait()` is called eventually so detached jobs can complete. + detach(build.build(_eg_rec(2345))) + await build.build(_eg_rec(3456)) await build.wait() diff --git a/make.py b/make.py @@ -3,21 +3,46 @@ make.py ------- Design inspired by the paper "Build Systems à la Carte" + - https://github.com/snowleopard/build - https://www.microsoft.com/en-us/research/wp-content/uploads/2018/03/build-systems.pdf Key concepts: + - The goal is to maintain an up-to-date *store* mapping *tasks* to *values*. - Tasks are described using rules, functions from parameters to tasks. -- Each rule can choose its own caching policy, the default being a persistent cache keyed by hashes. -- The scheduler can also be adjusted, but currently the only scheduler is a top-down suspending scheduler. +- Each rule can choose its own caching policy, the default is a persistent cache keyed by hashes. +- The current scheduler is a top-down (suspending) scheduler. make.py improves upon the paper's design in a few ways: -- Task keys (for book-keeping purposes) are automatically derived from the rule functions -- Supporting per-task cache policies rather than global rebuilders -- Using modern Python async features for concurrent execution -TODO: +- Task keys (for book-keeping purposes) are automatically derived from the rule functions. +- Tasks are executed concurrently. +- We split the two concepts Rebuilder and Scheduler into three concepts: + + - (Per-task) Caching policies. + - (Global) Updating strategy. + - (Global) Metadata updaters. + +# Why we re-interpret the concepts Rebuilder and Scheduler + +The paper merges the concept of "metadata updaters" in the Rebuilder and Scheduler. +This sort of makes sense as different rebuilders and schedulers require different metadata. + +However, it means that a rebuilder may need to override the `fetch` function in a call +in order to ensure the metadata required for the rebuilder is created, +and it encourages a local way to build metadata information. +Furthermore, a rebuilder may sometimes require the same metadata as a scheduler's fetch function, +for instance tracking dependency relationships is required for both the +topological sort scheduler as well as trace-based rebuilders (e.g. constructive trace rebuilder). + +So, we instead factor out the metadata updating portion of both rebuilders and schedulers +into a global metadata updater, which can be viewed as yet another wrapper around rules. +However, as this must apply on a global level to support the whole scheduling strategy, +metadata updaters are defined at a global level, unlike the per-task caching policies. + +# TODO + - Make files on the filesystem a core concept as opposed to merely something you can do. """ @@ -254,7 +279,6 @@ class Rules: possible_values.append(past_value) if possible_values: - store.key_value[task_key] = possible_values[0] return possible_values[0] new_inputs = [] @@ -265,7 +289,6 @@ class Rules: return result new_value = await inner_rule_fn(track_fetch, task_key, store, *args) - store.key_value[task_key] = new_value store.key_info[task_key].append((new_inputs, new_value)) return new_value @@ -324,34 +347,42 @@ class Detach: detach = Detach() -class SuspendingFetch: +class SuspendingScheduler: __slots__ = "store", "done", "waits" + store: Store + done: set[TaskKey] + waits: dict[TaskKey, asyncio.Event] def __init__(self, store: Store): self.store = store - self.done = dict() + self.done = set() self.waits = dict() async def wait(self): while detach._background_tasks: await asyncio.gather(*detach._background_tasks) - async def fetch(self, task: Task): + def build(self, task: Task): + return self.fetch_once(task) + + async def fetch_once(self, task: Task): task_key = task.task_key wait = None event = None if task_key in self.done: - return self.done[task_key] + return self.store.key_value[task_key] if task_key in self.waits: wait = self.waits[task_key] if wait: await wait.wait() - return self.done[task_key] + return self.store.key_value[task_key] event = self.waits[task_key] = asyncio.Event() - result = await task(self.fetch, self.store) - self.done[task_key] = result + self.store.key_value[task_key] = result = await task( + self.fetch_once, self.store + ) + self.done.add(task_key) event.set() return result @@ -362,17 +393,17 @@ class Build: def __init__(self, filename, rules=_rules): self._store = Store(filename, rules) - self._fetch = SuspendingFetch(self._store) + self._fetch = SuspendingScheduler(self._store) async def __call__(self, task: Task): - await self.fetch(task) + await self.build(task) await self.wait() def wait(self): return self._fetch.wait() - def fetch(self, task: Task): - return self._fetch.fetch(task) + def build(self, task: Task): + return self._fetch.build(task) def __enter__(self): self._store.__enter__()