blog.grace.moe

Source for the blog blog.grace.moe
git clone https://git.grace.moe/blog.grace.moe
Log | Files | Refs

commit 830574e92ea673a6f6ad774b1fee977da7f4817c
parent b1cf98aa6bf191fb68bd3e1d7c5d2a4c6f4a1d05
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Wed, 16 Apr 2025 03:09:40 +0800

Rewrite uploader Makefile with a python based build system

Diffstat:
M.gitignore | 2++
MMakefile | 161+------------------------------------------------------------------------------
Amake.py | 738+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 742 insertions(+), 159 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1 +1,3 @@ public/ +__pycache__/ +make.db diff --git a/Makefile b/Makefile @@ -1,162 +1,5 @@ -# BunnyCDN Uploader Makefile - -# Dependencies -# -# - make -# - jq -# - sha256sum -# - tr, sed -# - curl - -# Usage: -# -# 1) Enter your storage zone name and password -# 2) If you want to purge your pullzone automatically after upload, -# enter your pull zone ID and API password -# 3) Set UPLOAD to the list of files you want on the CDN -# - e.g. if you want https://---.b-cdn.net/hello/world.html to exist, -# then 'hello/world.html' should be one of the SOURCES. -# - Typically this should just be 'find . -type f' -# 4) Set CLEAN to the list of directories you want clean from the CDN -# - e.g. if 'hello/...' no longer exists locally, -# and you want https://---.b-cdn.net/hello/... to be deleted, -# then 'hello' should be one of the CLEAN. -# - Typically this should just be 'find . -type d' -# 5) Set LOCALPATH to be the path you want to upload files from. -# - e.g. if you have 'hello/world.html' in SOURCES, -# and it is stored in ./public/hello/world.html on the filesystem, -# then set LOCALPATH to 'public' - -# Notes: -# -# - Ensure TMPPATH does not exist. You may change this if you wish. -# - Make sure your make parallelism isn't set too high -# or you may run into BunnyCDN's API limits. - -# Limitations: -# -# - Paths cannot contain spaces, because Makefile. -# Use %20 instead, and name the file with %20 locally. -# It will show up as a space in BunnyCDN. - -STORAGENAME=blog-grace-moe -STORAGEPASSWORD=$(shell secret-tool lookup owner blog-grace-moe.b-cdn.net) - -PULLZONEID=3580820 -APIPASSWORD=$(shell secret-tool lookup owner grace@bunny.net) - -UPLOAD=$(shell cd '$(LOCALPATH)' && find . -type f) -CLEAN=$(shell cd '$(LOCALPATH)' && find . -type d) -LOCALPATH=public - -TMPPATH=.bunny-tmp - -################################################################################ - -# Set Make stuff - -.PHONY: all sync purge rebuild upload clean nothing \ - -.SECONDEXPANSION: -.SHELLFLAGS:=-eou pipefail -c -.ONESHELL: - -MAKEFLAGS=--no-print-directory -s - -PULLZONEID?= -APIPASSWORD?= - -# Hashing of filenames -# We use this to generate a unique filename for storing into .bunny-tmp -hash=$(shell sha256sum <<<'$1' | cut -d' ' -f1) - -STORAGEURL='https://sg.storage.bunnycdn.com/$(STORAGENAME)' -STORAGECMD=curl -H 'AccessKey: $(STORAGEPASSWORD)' -s -PURGEURL='https://api.bunny.net/pullzone/$(PULLZONEID)/purgeCache' -APICMD=curl -H 'AccessKey: $(APIPASSWORD)' -s - -all: rebuild - $(MAKE) sync - -sync: upload clean - $(MAKE) purge - -purge: - rm -rf $(TMPPATH) - if [ -n '$(PULLZONEID)' ]; then - $(APICMD) -XPOST '$(PURGEURL)' && echo 'Purged.' - fi - -rebuild: - if [ -n '$(NOREBUILD)' ]; then exit; fi - find public -mindepth 1 -maxdepth 1 -path 'public/git' -prune -o -exec rm -r {} + - zine release - [ -d public/git/blog.grace.moe ] || git init --bare public/git/blog.grace.moe - git push --mirror --force public/git/blog.grace.moe - git -C public/git/blog.grace.moe gc --no-detach --aggressive - git -C public/git/blog.grace.moe update-server-info - [ -d public/git/pymake ] || git init --bare public/git/pymake - git -C ~/Documents/src/pymake push --mirror --force $(realpath public/git/pymake) - git -C public/git/pymake gc --no-detach --aggressive - git -C public/git/pymake update-server-info - -upload: $(addprefix $(TMPPATH)/upload/,$(UPLOAD)) -clean: $(addprefix $(TMPPATH)/clean/,$(CLEAN)) -nothing: - -$(TMPPATH)/contents/%/contents: FORCE - # Get dir contents from bunny, and - # Delete any files or directories on bunny that don't exist locally - mkdir -p '$(@D)' - # echo + download '$(shell sed 's_[^/]*/__' <<<'$*')' - $(STORAGECMD) '$(STORAGEURL)/$(shell sed 's_^[^/]*/__' <<<'$*')/' >'$@' - echo - download '$(shell sed 's_[^/]*/__' <<<'$*')' - -$(TMPPATH)/clean/%: $(TMPPATH)/contents/$$(call hash,$$*)/$$*/contents FORCE - # echo + clean '$(*)' - $(MAKE) nothing $$(<'$<' jq -r '"$(TMPPATH)/cleanfile/$(*)/" + - (arrays | .[] | select(.IsDirectory == false).ObjectName)') - $(MAKE) nothing $$(<'$<' jq -r '"$(TMPPATH)/cleandir/$(*)/" + - (arrays | .[] | select(.IsDirectory == true).ObjectName)') - echo - clean '$(*)' - -$(TMPPATH)/cleanfile/%: FORCE - # echo + cleanfile '$(*)' - if ! [ -f '$(LOCALPATH)/$(*)' ]; then - echo Deleting file '$(*)' - $(STORAGECMD) -XDELETE '$(STORAGEURL)/$(*)' - echo - fi | tac | tac - # echo - cleanfile '$(*)' - -$(TMPPATH)/cleandir/%: FORCE - # echo + cleandir '$(*)' - if ! [ -d '$(LOCALPATH)/$(*)' ]; then - echo Deleting dir '$(*)' - $(STORAGECMD) -XDELETE '$(STORAGEURL)/$(*)'/ - echo - fi | tac | tac - # echo - cleandir '$(*)' - -$(TMPPATH)/upload/%: $(TMPPATH)/contents/$$(call hash,$$(*D))/$$(*D)/contents FORCE - # Using dir contents, upload any files that have the wrong hash - # echo + upload '$(*)' - diff >/dev/null \ - <(<'$<' jq -r 'arrays | .[] | - select(.ObjectName == "$(notdir $@)").Checksum' - ) \ - <(<'$(LOCALPATH)/$(*)' \ - sha256sum | - tr '[:lower:]' '[:upper:]' | - sed 's/ .*//' - ) || ( - echo uploading '$(*)' - $(STORAGECMD) -T'$(LOCALPATH)/$(*)' '$(STORAGEURL)/$(*)' - echo - ) | tac |tac - echo - upload '$(*)' - -FORCE: +all: + python make.py .PHONY: install-Mathjax; install-Mathjax: diff --git a/make.py b/make.py @@ -0,0 +1,738 @@ +""" +make.py +------- + +Design inspired by the paper "Build Systems à la Carte" + +- https://github.com/snowleopard/build +- https://www.microsoft.com/en-us/research/wp-content/uploads/2018/03/build-systems.pdf + +Key concepts: + +- The goal is to maintain an up-to-date *store* mapping *tasks* to *values*. +- Tasks are described using rules, functions from parameters to tasks. +- Each rule can choose its own caching policy, the default is a persistent cache keyed by hashes. +- The current scheduler is a top-down (suspending) scheduler. + +make.py improves upon the paper's design in a few ways: + +- Task keys (for book-keeping purposes) are automatically derived from the rule functions. +- Tasks are executed concurrently. +- We split the two concepts Rebuilder and Scheduler into three concepts: + + - (Per-task) Caching policies. + - (Global) Updating strategy. + - (Global) Metadata updaters. + +# Why we re-interpret the concepts Rebuilder and Scheduler + +The paper merges the concept of "metadata updaters" in the Rebuilder and Scheduler. +This sort of makes sense as different rebuilders and schedulers require different metadata. + +However, it means that a rebuilder may need to override the `fetch` function in a call +in order to ensure the metadata required for the rebuilder is created, +and it encourages a local way to build metadata information. +Furthermore, a rebuilder may sometimes require the same metadata as a scheduler's fetch function, +for instance tracking dependency relationships is required for both the +topological sort scheduler as well as trace-based rebuilders (e.g. constructive trace rebuilder). + +So, we instead factor out the metadata updating portion of both rebuilders and schedulers +into a global metadata updater, which can be viewed as yet another wrapper around rules. +However, as this must apply on a global level to support the whole scheduling strategy, +metadata updaters are defined at a global level, unlike the per-task caching policies. + +# TODO + +- Make files on the filesystem a core concept as opposed to merely something you can do. +""" + +import asyncio +import collections +import functools +import hashlib +import inspect +import pickle +import subprocess +import sys +import traceback + +from typing import ( + Any, + Awaitable, + Callable, + Concatenate, + Optional, + ParamSpec, + Protocol, +) + + +class Fetch(Protocol): + """Protocol defining the fetch operation used by tasks.""" + + async def __call__(self, task: "Task") -> Any: ... + + +RuleKey = bytes +TaskKey = tuple +ValueHash = bytes + +P = ParamSpec("P") +RuleFn = Callable[Concatenate[Fetch, TaskKey, "Store", P], Awaitable[Any]] +NiceRuleFn = Callable[Concatenate[Fetch, P], Awaitable[Any]] + + +def make_hash(o: Any) -> bytes: + if isinstance(o, bytes): + h = hashlib.sha256(b"s") + h.update(o) + else: + h = hashlib.sha256(b"r") + h.update(repr(o).encode("utf-8")) + return h.digest() + + +def rule_fn_to_key(fn: Callable) -> RuleKey: + name = fn.__name__ + source = inspect.getsource(fn) + h = hashlib.sha256(source.encode("utf-8")).hexdigest()[:16] + key = f"{name}-{len(source)}-{h}".encode("utf-8") + return key + + +class Task: + """A computation of a value.""" + + __slots__ = "task_key", "rule_fn", "args", "hash" + + task_key: TaskKey + rule_fn: RuleFn + args: tuple + hash: int + + def __init__(self, task_key: TaskKey, rule_fn: RuleFn, *args): + self.task_key = task_key + self.rule_fn = rule_fn + self.args = args + self.hash = hash(self.task_key) + + def __call__(self, fetch: Fetch, store: "Store"): + return self.rule_fn(fetch, self.task_key, store, *self.args) + + def __repr__(self) -> str: + return repr(self.task_key) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Task): + return NotImplemented + return self.task_key == other.task_key + + def __hash__(self) -> int: + return self.hash + + +class Rule: + """A function that returns tasks.""" + + __slots__ = "rule_key", "rule_fn", "hash" + + rule_key: RuleKey + rule_fn: RuleFn + hash: int + + @staticmethod + def new(rule_fn: RuleFn): + return Rule( + rule_fn_to_key(rule_fn), + rule_fn, + ) + + def __init__(self, rule_key: RuleKey, rule_fn: RuleFn): + self.rule_key = rule_key + self.rule_fn = rule_fn + self.hash = hash(self.rule_key) + + def __call__(self, *args): + return Task( + ( + self.rule_key, + *(arg.task_key if isinstance(arg, Task) else arg for arg in args), + ), + self.rule_fn, + *args, + ) + + def __eq__(self, other): + if not isinstance(other, Rule): + return NotImplemented + return self.rule_key == other.rule_key + + def __hash__(self): + return self.hash + + +class Rules: + """The registry of all rules created.""" + + __slots__ = "rules" + + rules: dict[RuleKey, Rule] + + def __init__(self): + self.rules = dict() + + def eval_task_key(self, task_key: TaskKey) -> Optional[Task]: + rule_key, *arg_keys = task_key + if rule_key not in self.rules: + return None + rule = self.rules[rule_key] + args = [] + for arg in arg_keys: + if isinstance(arg, tuple) and arg[0] not in self.rules: + return None + args.append(self.eval_task_key(arg) if isinstance(arg, tuple) else arg) + return rule(*args) + + @staticmethod + def nice_rule_fn_to_rule_fn(nice_rule_fn, fetch, task_key, store, *args): + return nice_rule_fn(fetch, *args) + + def rule(self, rule_fn: NiceRuleFn) -> Rule: + return self.register( + self.hash_cache( + Rule.new( + functools.update_wrapper( + functools.partial(Rules.nice_rule_fn_to_rule_fn, rule_fn), + rule_fn, + ) + ) + ) + ) + + def rule_no_cache(self, rule_fn: NiceRuleFn) -> Rule: + return self.register( + Rule.new( + functools.update_wrapper( + functools.partial(Rules.nice_rule_fn_to_rule_fn, rule_fn), + rule_fn, + ) + ) + ) + + def register(self, rule: Rule) -> Rule: + self.rules[rule.rule_key] = rule + return rule + + def hash_cache(self, rule: Rule) -> Rule: + """Adds hash based caching to a rule + + Attempts to replay the rule by checking if the hashes of each input + it would have obtained if run now matches up with a previous run. + + Currently, there is no cache eviction policy (all previous runs are stored forever). + + TODO: Implement some cache eviction. + """ + rule.rule_fn = functools.update_wrapper( + functools.partial(Rules.hash_cache_fn, self, rule.rule_fn), + rule.rule_fn, + ) + return rule + + @staticmethod + async def track_fetch(fetch: Fetch, new_inputs: list, task: Task): + result = await fetch(task) + new_inputs.append((task.task_key, make_hash(result))) + return result + + async def hash_cache_fn( + self, + inner_rule_fn: RuleFn, + fetch: Fetch, + task_key: TaskKey, + store: "Store", + *args, + ): + """Actual implementation of hash_cache""" + if task_key in store.key_info: + past_runs = store.key_info[task_key] + output_value = store.key_value[task_key] + possible_values = [] + for past_inputs, past_value in past_runs: + for past_input_key, past_input_hash in past_inputs: + input_task = self.eval_task_key(past_input_key) + if not input_task: + break + current_input_value = await fetch(input_task) + if make_hash(current_input_value) != past_input_hash: + break + else: + if output_value == past_value: + return past_value + possible_values.append(past_value) + + if possible_values: + return possible_values[0] + + new_inputs = [] + + new_value = await inner_rule_fn( + functools.partial(Rules.track_fetch, fetch, new_inputs), + task_key, + store, + *args, + ) + store.key_info[task_key].append((new_inputs, new_value)) + return new_value + + +_rules = Rules() +rule = _rules.rule +rule_no_cache = _rules.rule_no_cache +register = _rules.register +hash_cache = _rules.hash_cache + + +class Store: + """Stores a mapping from tasks to their values.""" + + __slots__ = "filename", "rules", "key_value", "key_info" + + @staticmethod + def _fNone(): + return None + + def __init__(self, filename, rules): + self.filename = filename + self.rules = rules + + self.key_value = collections.defaultdict(Store._fNone) + self.key_info = collections.defaultdict(list) + + try: + with open(filename, "rb") as f: + self.key_value, self.key_info = pickle.load(f) + except: + pass + + def save(self): + with open(self.filename, "wb") as f: + pickle.dump((self.key_value, self.key_info), f) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.save() + + +class Detach: + __slots__ = "_background_tasks" + + def __init__(self): + self._background_tasks = set() + + def __call__(self, *args, **kwargs): + task = asyncio.create_task(*args, **kwargs) + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) + + +detach = Detach() + + +class SuspendingScheduler: + __slots__ = "store", "done", "waits" + store: Store + done: set[TaskKey] + waits: dict[TaskKey, asyncio.Event] + + def __init__(self, store: Store): + self.store = store + self.done = set() + self.waits = dict() + + async def wait(self): + while detach._background_tasks: + await asyncio.gather(*detach._background_tasks) + + def build(self, task: Task): + return self.fetch_once(task) + + async def fetch_once(self, task: Task): + task_key = task.task_key + wait = None + event = None + if task_key in self.done: + return self.store.key_value[task_key] + if task_key in self.waits: + wait = self.waits[task_key] + + if wait: + await wait.wait() + return self.store.key_value[task_key] + + event = self.waits[task_key] = asyncio.Event() + try: + self.store.key_value[task_key] = result = await task( + self.fetch_once, self.store + ) + except: + print(traceback.format_exc()) + event.set() + self.store.key_value[task_key] = None + return None + + self.done.add(task_key) + event.set() + + return result + + +class Build: + __slots__ = "_store", "_scheduler" + + def __init__(self, filename, rules=_rules): + self._store = Store(filename, rules) + self._scheduler = SuspendingScheduler(self._store) + + async def __call__(self, task: Task): + result = await self.build(task) + await self.wait() + return result + + def wait(self): + return self._scheduler.wait() + + def build(self, task: Task): + return self._scheduler.build(task) + + def __enter__(self): + self._store.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._store.__exit__(exc_type, exc_val, exc_tb) + + +class ShellResult(collections.namedtuple("ShellResult", "stdout stderr returncode")): + __slots__ = () + + @property + def utf8stdout(self): + return self.stdout.decode("utf-8") + + @property + def utf8stderr(self): + return self.stderr.decode("utf-8") + + +EchoNothing = 0 +EchoStdout = 1 +EchoStderr = 2 +EchoAll = 3 + + +async def shell( + cmd, + input=None, + echo=EchoNothing, +) -> ShellResult: + proc = await asyncio.create_subprocess_shell( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = await proc.communicate(input) + if echo & EchoStdout: + sys.stdout.buffer.write(stdout) + sys.stdout.buffer.flush() + if echo & EchoStderr: + sys.stderr.buffer.write(stderr) + sys.stderr.buffer.flush() + return ShellResult(stdout, stderr, proc.returncode) + + +def run_in_executor(f, *args, executor=None): + return asyncio.get_running_loop().run_in_executor(executor, f, *args) + + +# class AsyncWrapperSpec: +# __slots__ = "async_methods", "async_subobjects" + +# def __init__( +# self, +# async_methods=set(), +# async_subobjects=dict(), +# ): +# self.async_methods = set(async_methods) +# self.async_subobjects = dict(async_subobjects) + + +# class AsyncWrapper: +# __slots__ = "_obj", "_spec", "_executor" + +# def __init__(self, obj, spec=AsyncWrapperSpec(), executor=None): +# self._obj = obj +# self._spec = spec +# self._executor = executor + +# @staticmethod +# def wrapper(f, executor, *args): +# return run_in_executor(f, *args, executor=executor) + +# def __getattr__(self, attr): +# if attr in self._spec.async_methods: +# return functools.partial( +# self.wrapper, getattr(self._obj, attr), self._executor +# ) +# if attr in self._spec.async_subobjects: +# return AsyncWrapper( +# getattr(self._obj, attr), +# spec=self._spec.async_subobjects[attr], +# executor=self._executor, +# ) +# return getattr(self._obj, attr) + +# async def __aenter__(self): +# return AsyncWrapper( +# await run_in_executor(self._obj.__enter__, executor=self._executor), +# spec=self._spec, +# ) + +# async def __aexit__(self, exc_type, exc_val, exc_tb): +# return await run_in_executor( +# self._obj.__exit__, exc_type, exc_val, exc_tb, executor=self._executor +# ) + +# def __aiter__(self): +# return AsyncWrapper(self._obj.__iter__(), spec=self._spec) + +# @staticmethod +# def wrapped_next(obj): +# try: +# return True, next(obj) +# except StopIteration: +# return False, None + +# async def __anext__(self): +# ok, res = await run_in_executor( +# functools.partial(self.wrapped_next, self._obj), executor=self._executor +# ) +# if not ok: +# raise StopAsyncIteration +# return res + +# @staticmethod +# def wrapped_foreach(f, obj): +# for chunk in obj: +# f(chunk) + +# async def foreach(self, f): +# await run_in_executor( +# functools.partial(self.wrapped_foreach, f, self._obj), +# executor=self._executor, +# ) + + +# class AsyncIO(Protocol): +# async def __aenter__(self) -> "AsyncIO": ... +# async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: ... + +# async def close(self) -> None: ... +# def fileno(self) -> int: ... +# async def flush(self) -> None: ... +# def isatty(self) -> bool: ... +# def readable(self) -> bool: ... +# async def readlines(self, hint: int = -1, /) -> list[bytes]: ... +# async def seek(self, offset: int, whence: int = 0, /) -> int: ... +# def seekable(self) -> bool: ... +# async def tell(self) -> int: ... +# async def truncate(self, size: int | None = None, /) -> int: ... +# def writable(self) -> bool: ... +# async def writelines(self, lines, /) -> None: ... +# async def readline(self, size: int | None = -1, /) -> bytes: ... +# @property +# def closed(self) -> bool: ... +# async def readall(self) -> bytes: ... +# async def readinto(self, buffer, /) -> Any: ... +# async def write(self, b, /) -> Any: ... +# async def read(self, size: int = -1, /) -> Any: ... +# def detach(self) -> "AsyncIO": ... +# async def readinto1(self, buffer, /) -> int: ... +# async def read1(self, size: int = -1, /) -> bytes: ... + +# mode: str +# name: Any + +# @property +# def closefd(self) -> bool: ... + +# raw: "AsyncIO" + +# async def peek(self, size: int = 0, /) -> bytes: ... + +# encoding: str +# errors: str | None +# newlines: str | tuple[str, ...] | None + +# def __aiter__(self) -> AsyncIterator[Any]: ... +# async def __anext__(self) -> Any: ... + +# async def foreach(self, f) -> Any: ... + + +# async def open_async(*args, executor=None) -> AsyncIO: +# # List of methods: https://docs.python.org/3/library/io.html +# async_methods = ( +# "close", +# "detach", +# "flush", +# "peek", +# "read", +# "read1", +# "readall", +# "readinto", +# "readinto1", +# "readline", +# "readlines", +# "seek", +# "tell", +# "truncate", +# "write", +# "writelines", +# ) +# return AsyncWrapper( +# await run_in_executor(open, *args, executor=executor), +# AsyncWrapperSpec(async_methods, {"buffer": AsyncWrapperSpec(async_methods)}), +# ) # type: ignore + + +import json + + +async def main(): + sem = asyncio.Semaphore(50) + + @rule_no_cache + def rebuild(fetch: Fetch): + return shell( + """ + find public -mindepth 1 -maxdepth 1 -path 'public/git' -prune -o -exec rm -r {} + + zine release + [ -d public/git/blog.grace.moe ] || git init --bare public/git/blog.grace.moe + git push --mirror --force public/git/blog.grace.moe + git -C public/git/blog.grace.moe gc --no-detach --aggressive + git -C public/git/blog.grace.moe update-server-info + [ -d public/git/pymake ] || git init --bare public/git/pymake + git -C ~/Documents/src/pymake push --mirror --force $(realpath public/git/pymake) + git -C public/git/pymake gc --no-detach --aggressive + git -C public/git/pymake update-server-info + """, + echo=EchoAll, + ) + + STORAGENAME = "blog-grace-moe" + STORAGEPASSWORD = ( + await shell("secret-tool lookup owner blog-grace-moe.b-cdn.net") + ).utf8stdout + + PULLZONEID = "3580820" + APIPASSWORD = (await shell("secret-tool lookup owner grace@bunny.net")).utf8stdout + LOCALPATH = "public" + + STORAGEURL = f"https://sg.storage.bunnycdn.com/{STORAGENAME}" + STORAGECMD = f"curl -H 'AccessKey: {STORAGEPASSWORD}' -s" + PURGEURL = f"https://api.bunny.net/pullzone/{PULLZONEID}/purgeCache" + APICMD = f"curl -H 'AccessKey: {APIPASSWORD}' -s" + + @rule_no_cache + async def contents(fetch: Fetch, path: str): + async with sem: + print("+++ download", path) + path_json = await shell(f"{STORAGECMD} '{STORAGEURL}/{path}/'") + print("--- download", path) + return json.loads(path_json.utf8stdout) + + @rule_no_cache + async def cleanfile(fetch: Fetch, path: str): + if 0 != (await shell(f"[ -f '{LOCALPATH}/{path}' ]")).returncode: + async with sem: + print("+++ cleanfile", path) + await shell(f"{STORAGECMD} -XDELETE '{STORAGEURL}/{path}'") + print("--- cleanfile", path) + + @rule_no_cache + async def cleandir(fetch: Fetch, path: str): + if 0 != (await shell(f"[ -d '{LOCALPATH}/{path}' ]")).returncode: + async with sem: + print("+++ cleandir", path) + await shell(f"{STORAGECMD} -XDELETE '{STORAGEURL}/{path}/'") + print("--- cleandir", path) + + @rule_no_cache + async def clean(fetch: Fetch, path: str): + path_contents = await fetch(contents(path)) + await asyncio.gather( + *( + ( + fetch(cleandir(path + "/" + ent["ObjectName"])) + if ent["IsDirectory"] + else fetch(cleanfile(path + "/" + ent["ObjectName"])) + ) + for ent in path_contents + ) + ) + print("- clean", path) + + def hash_file(path): + with open(path, "rb") as f: + h = hashlib.sha256() + for chunk in f: + h.update(chunk) + return h.hexdigest().upper() + + @rule_no_cache + async def upload(fetch: Fetch, path: str): + path_contents = await fetch(contents(path[: path.rfind("/")])) + + bunny_checksum = None + if isinstance(path_contents, list): + try: + bunny_checksum = next( + ( + ent["Checksum"] + for ent in path_contents + if ent["ObjectName"] == path[path.rfind("/") + 1 :] + ) + ) + except StopIteration: + pass + + our_checksum = await run_in_executor(hash_file, f"{LOCALPATH}/{path}") + + if bunny_checksum != our_checksum: + async with sem: + print("+++ uploading", path) + await shell( + f"{STORAGECMD} -T'{LOCALPATH}/{path}' '{STORAGEURL}/{path}'" + ) + print("--- uploading", path) + print("- upload", path) + + @rule_no_cache + async def purge(fetch: Fetch): + await shell(f"{APICMD} -XPOST '{PURGEURL}'") + print("Purged.") + + with Build("make.db") as build: + await build(rebuild()) + UPLOAD = (await shell(f"cd '{LOCALPATH}' && find . -type f")).utf8stdout + CLEAN = (await shell(f"cd '{LOCALPATH}' && find . -type d")).utf8stdout + await asyncio.gather( + *(build(upload(path)) for path in UPLOAD.strip().split("\n")), + *(build(clean(path)) for path in CLEAN.strip().split("\n")), + ) + await build(purge()) + + +if __name__ == "__main__": + asyncio.run(main())