commit bcab7973f2109cacb013447226ecba899d920ae4
parent 71e96d09f2ea76e14d1759006ccfe19211a3ddce
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Sun, 20 Apr 2025 07:09:34 +0800
Make the two tar sketches more similar
Diffstat:
3 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/tar-sketch/a.txt b/tar-sketch/a.txt
@@ -1 +1 @@
-Sun Apr 20 07:04:58 AM +08 2025
+Sun Apr 20 07:08:59 AM +08 2025
diff --git a/tar-sketch/tar.py b/tar-sketch/tar.py
@@ -3,6 +3,8 @@ import os
import pickle
import subprocess
+from io import BufferedReader
+
db: dict = dict()
try:
@@ -14,20 +16,25 @@ except BaseException as e:
file_modtime = lambda f: os.stat(f.fileno()).st_mtime_ns
-def file_hash(filename):
- with open(filename, "rb") as f:
- modtime = file_modtime(f)
- if modtime == db.get(f"file:{filename}:modtime", None):
- print("skipping hash", db[f"file:{filename}:hash"])
- return db[f"file:{filename}:hash"]
- db[f"file:{filename}:modtime"] = modtime
- h = hashlib.sha256()
- for chunk in f:
- h.update(chunk)
- d = h.hexdigest()
- db[f"file:{filename}:hash"] = d
- print("hash", filename, d)
- return d
+def _file_hash(f: BufferedReader):
+ modtime = file_modtime(f)
+ if modtime == db.get(f"file:{f.name}:modtime", None):
+ return db[f"file:{f.name}:hash"]
+ db[f"file:{f.name}:modtime"] = modtime
+ h = hashlib.sha256()
+ for chunk in f:
+ h.update(chunk)
+ d = h.hexdigest()
+ db[f"file:{f.name}:hash"] = d
+ print("hash", f.name, d)
+ return d
+
+
+def file_hash(f: BufferedReader | bytes | str):
+ if isinstance(f, bytes) or isinstance(f, str):
+ with open(f, "rb") as f:
+ return _file_hash(f)
+ return _file_hash(f)
def tar(manifest=b"manifest", output=b"archive.tar.gz"):
@@ -45,10 +52,12 @@ def tar(manifest=b"manifest", output=b"archive.tar.gz"):
file_hashes[manifest] = file_hash(manifest)
for fname in manifest_lines:
file_hashes[fname] = file_hash(fname)
- db["tar:file_hashes"] = file_hashes
- print("tar", "cvzf", output, *manifest_lines)
- subprocess.run([b"tar", b"cvzf", output, *manifest_lines])
+ print("tar", "cvzf", output, *manifest_lines)
+ subprocess.run([b"tar", b"cvzf", output, *manifest_lines])
+
+ file_hashes[output] = file_hash(output)
+ db["tar:file_hashes"] = file_hashes
tar()
diff --git a/tar-sketch/tar2.py b/tar-sketch/tar2.py
@@ -2,7 +2,6 @@ import contextvars
import functools
import inspect
import pickle
-import types
from typing import Any
db: dict = dict()