commit 81f44ad2bc289f1f77b5854c1bad4a7d20755fbd
parent 627941f7673222f72eb5c654bcffe08504a2c798
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Tue, 15 Apr 2025 22:20:37 +0800
Modify the suspending scheduler to use the Store, and edit a bunch of comments
Diffstat:
| M | examples.py | | | 8 | ++++---- |
| M | make.py | | | 69 | ++++++++++++++++++++++++++++++++++++++++++++++++++------------------- |
2 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/examples.py b/examples.py
@@ -82,10 +82,10 @@ async def main():
)
# Note that `build(...)` will wait for all detached jobs to complete before returning.
- # You may choose to use the lower level `build.fetch(...)` function instead, which does not wait for detached jobs.
- # You must then ensure `build.wait()` is called later to wait for detached jobs to complete.
- detach(build.fetch(_eg_rec(2345)))
- await build.fetch(_eg_rec(3456))
+ # You may instead use `build.build(...)`, which does not wait for detached jobs.
+ # You should ensure `build.wait()` is called eventually so detached jobs can complete.
+ detach(build.build(_eg_rec(2345)))
+ await build.build(_eg_rec(3456))
await build.wait()
diff --git a/make.py b/make.py
@@ -3,21 +3,46 @@ make.py
-------
Design inspired by the paper "Build Systems à la Carte"
+
- https://github.com/snowleopard/build
- https://www.microsoft.com/en-us/research/wp-content/uploads/2018/03/build-systems.pdf
Key concepts:
+
- The goal is to maintain an up-to-date *store* mapping *tasks* to *values*.
- Tasks are described using rules, functions from parameters to tasks.
-- Each rule can choose its own caching policy, the default being a persistent cache keyed by hashes.
-- The scheduler can also be adjusted, but currently the only scheduler is a top-down suspending scheduler.
+- Each rule can choose its own caching policy, the default is a persistent cache keyed by hashes.
+- The current scheduler is a top-down (suspending) scheduler.
make.py improves upon the paper's design in a few ways:
-- Task keys (for book-keeping purposes) are automatically derived from the rule functions
-- Supporting per-task cache policies rather than global rebuilders
-- Using modern Python async features for concurrent execution
-TODO:
+- Task keys (for book-keeping purposes) are automatically derived from the rule functions.
+- Tasks are executed concurrently.
+- We split the two concepts Rebuilder and Scheduler into three concepts:
+
+ - (Per-task) Caching policies.
+ - (Global) Updating strategy.
+ - (Global) Metadata updaters.
+
+# Why we re-interpret the concepts Rebuilder and Scheduler
+
+The paper merges the concept of "metadata updaters" in the Rebuilder and Scheduler.
+This sort of makes sense as different rebuilders and schedulers require different metadata.
+
+However, it means that a rebuilder may need to override the `fetch` function in a call
+in order to ensure the metadata required for the rebuilder is created,
+and it encourages a local way to build metadata information.
+Furthermore, a rebuilder may sometimes require the same metadata as a scheduler's fetch function,
+for instance tracking dependency relationships is required for both the
+topological sort scheduler as well as trace-based rebuilders (e.g. constructive trace rebuilder).
+
+So, we instead factor out the metadata updating portion of both rebuilders and schedulers
+into a global metadata updater, which can be viewed as yet another wrapper around rules.
+However, as this must apply on a global level to support the whole scheduling strategy,
+metadata updaters are defined at a global level, unlike the per-task caching policies.
+
+# TODO
+
- Make files on the filesystem a core concept as opposed to merely something you can do.
"""
@@ -254,7 +279,6 @@ class Rules:
possible_values.append(past_value)
if possible_values:
- store.key_value[task_key] = possible_values[0]
return possible_values[0]
new_inputs = []
@@ -265,7 +289,6 @@ class Rules:
return result
new_value = await inner_rule_fn(track_fetch, task_key, store, *args)
- store.key_value[task_key] = new_value
store.key_info[task_key].append((new_inputs, new_value))
return new_value
@@ -324,34 +347,42 @@ class Detach:
detach = Detach()
-class SuspendingFetch:
+class SuspendingScheduler:
__slots__ = "store", "done", "waits"
+ store: Store
+ done: set[TaskKey]
+ waits: dict[TaskKey, asyncio.Event]
def __init__(self, store: Store):
self.store = store
- self.done = dict()
+ self.done = set()
self.waits = dict()
async def wait(self):
while detach._background_tasks:
await asyncio.gather(*detach._background_tasks)
- async def fetch(self, task: Task):
+ def build(self, task: Task):
+ return self.fetch_once(task)
+
+ async def fetch_once(self, task: Task):
task_key = task.task_key
wait = None
event = None
if task_key in self.done:
- return self.done[task_key]
+ return self.store.key_value[task_key]
if task_key in self.waits:
wait = self.waits[task_key]
if wait:
await wait.wait()
- return self.done[task_key]
+ return self.store.key_value[task_key]
event = self.waits[task_key] = asyncio.Event()
- result = await task(self.fetch, self.store)
- self.done[task_key] = result
+ self.store.key_value[task_key] = result = await task(
+ self.fetch_once, self.store
+ )
+ self.done.add(task_key)
event.set()
return result
@@ -362,17 +393,17 @@ class Build:
def __init__(self, filename, rules=_rules):
self._store = Store(filename, rules)
- self._fetch = SuspendingFetch(self._store)
+ self._fetch = SuspendingScheduler(self._store)
async def __call__(self, task: Task):
- await self.fetch(task)
+ await self.build(task)
await self.wait()
def wait(self):
return self._fetch.wait()
- def fetch(self, task: Task):
- return self._fetch.fetch(task)
+ def build(self, task: Task):
+ return self._fetch.build(task)
def __enter__(self):
self._store.__enter__()