diff --git a/pyproject.toml b/pyproject.toml index 0c81748..b562401 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ deps = pytest-cov coverage commands = - pytest --cov src/nancy + pytest --cov {envsitepackagesdir}/nancy [testenv:mypy] deps = diff --git a/src/nancy/cli/__init__.py b/src/nancy/cli/__init__.py index 0673616..c83fc91 100644 --- a/src/nancy/cli/__init__.py +++ b/src/nancy/cli/__init__.py @@ -3,7 +3,6 @@ from loguru import logger from ..version import __version__ -# from .freeze import freeze, thaw from . import diff from . import record @@ -50,8 +49,6 @@ def main(log_level: str) -> None: logger.add(sys.stderr, level=log_level) -# main.add_command(freeze) -# main.add_command(thaw) main.add_command(diff.status, name="status") main.add_command(record.record_cli, name="record") main.add_command(version) diff --git a/src/nancy/cli/diff.py b/src/nancy/cli/diff.py index 6143846..651bbdd 100644 --- a/src/nancy/cli/diff.py +++ b/src/nancy/cli/diff.py @@ -38,6 +38,11 @@ def print_diff( hashcolor = Fore.MAGENTA if use_color else "" def _print_row(tag: str, entry: fs.FSEntry, level: int) -> None: + if len(entry.versions) == 0: + print(Fore.RED + "NOVERSIONS" + Style.RESET_ALL + entry.sha256) + else: + ver = entry.versions[-1] + relpath = entry.relpath # Format relpath using filetype-based colors @@ -48,25 +53,25 @@ def print_diff( dirstr = ( (filetypecolors["DIR"] + dname + "/" + reset) if dname != "" else "" ) - assert entry.filetype is not None - fname = filetypecolors.get(str(entry.filetype), "") + fname + reset + assert ver.filetype is not None + fname = filetypecolors.get(str(ver.filetype), "") + fname + reset - if entry.filetype == fs.FileType.LNK: # append symlink target - assert entry.symlink_target is not None - fname += " -> " + entry.symlink_target + if ver.filetype == fs.FileType.LNK: # append symlink target + assert ver.symlink_target is not None + fname += " -> " + ver.symlink_target relpath = dirstr + fname assert entry.sha256 is not None hashchange = ( - (hashcolor + entry.sha256.hex() + reset + " " + changetags[tag]) + (hashcolor + ver.sha256.hex() + reset + " " + changetags[tag]) if show_hashes else changetags[tag] ) print( hashchange, - entry.unfrozen_perms, + ver.perms, relpath, ) diff --git a/src/nancy/cli/freeze.py b/src/nancy/cli/freeze.py deleted file mode 100644 index 35a6949..0000000 --- a/src/nancy/cli/freeze.py +++ /dev/null @@ -1,45 +0,0 @@ -import click - -from .. import store - -import os -from typing import List - - -@click.command() -@click.argument("directory") -def freeze(directory: str) -> None: - """ - Initialize tracking in a directory or freeze a tracked directory. - - If DIRECTORY is not already part of an existing nancy store, then a new - 'nancy.db' file is created in that directory. On the other hand, if the - directory is part of an existing store, it will be re-frozen and versions - of any files changes since thawing will be incremented. - """ - if not os.path.isdir(directory): - raise ValueError(f"Cannot freeze non-existent directory {directory}") - - existing_store = store.find_store(directory) - if existing_store is None: # this is a new store - s = store.Store.init(directory) - else: # this is an existing store - s = store.Store(directory) - - # s.freeze() - - -@click.command() -@click.argument("files", nargs=-1) # , help="Files or directories to thaw.") -def thaw(files: List[str]) -> None: - """ - Enable manual alteration of files within a tracked directory. - - This command is meant to be used in conjunction with the 'freeze' - subcommand. After thawing, changes may be made in the current directory, - after which `nancy freeze` should be run changes may be made in the current - directory, after which `nancy freeze` should be run. At that point, changes - will be recorded: new files will be detected and modified files will have - their version numbers incremented. - """ - pass diff --git a/src/nancy/data.py b/src/nancy/data.py new file mode 100644 index 0000000..1ce6dc1 --- /dev/null +++ b/src/nancy/data.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Generic, Optional, TypeVar +import uuid + +T = TypeVar("T") + + +@dataclass(slots=True) +class Box(Generic[T]): + value: Optional[T] = None + uuid: str = "" + version: int = 0 # incremented whenever passed as a non-const argument + + def __post_init__(self) -> None: + self.uuid = str(uuid.uuid4()) + + +StrBox = Box[str] diff --git a/src/nancy/environment.py b/src/nancy/environment.py index bdfd7f1..68ead5c 100644 --- a/src/nancy/environment.py +++ b/src/nancy/environment.py @@ -85,13 +85,13 @@ class Environment: fdor = "" try: - fdor = json.dumps(platform.freedesktop_os_release()) + fdor = json.dumps(platform.freedesktop_os_release(), sort_keys=True) except AttributeError: # freedesktop_os_release only available for python >= 3.10 fdor = "" return cls( - json.dumps(dict(os.environ)), + json.dumps(dict(os.environ), sort_keys=True), platform.python_implementation(), sys.version, sys.hexversion, @@ -99,6 +99,6 @@ class Environment: timezone=time.tzname[time.daylight], release=platform.release(), freedesktop_os_release=fdor, - win32_ver=json.dumps(platform.win32_ver()), - mac_ver=json.dumps(platform.mac_ver()), + win32_ver=json.dumps(platform.win32_ver(), sort_keys=True), + mac_ver=json.dumps(platform.mac_ver(), sort_keys=True), ) diff --git a/src/nancy/fs.py b/src/nancy/fs.py index c52993d..83f8a38 100644 --- a/src/nancy/fs.py +++ b/src/nancy/fs.py @@ -2,7 +2,9 @@ from loguru import logger -from dataclasses import dataclass +from . import program + +from dataclasses import dataclass, field from datetime import datetime from enum import Enum import hashlib @@ -11,68 +13,26 @@ import os from pathlib import Path import sqlite3 import stat -from typing import Any, AnyStr, List, Optional, Tuple, TypeVar, Type, Union +from typing import ( + Any, + AnyStr, + Callable, + List, + Optional, + Tuple, + TypeVar, + Type, + Union, + TYPE_CHECKING, +) +import uuid import warnings - -PathStr = Union[str, Path, "os.PathLike[str]"] +if TYPE_CHECKING: + from .store import Store -def remove_write_perms(path: PathStr) -> Optional[str]: - """Remove write permissions for all users while preserving other perms""" - if not os.path.islink(path): - s = os.stat(path) - orig_perm_string = stat.filemode(s.st_mode) - os.chmod( - path, - s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH), - ) - follow_symlinks = False - else: - if os.stat not in os.supports_follow_symlinks: - # can't stat this thing directly on this platform - # means we can only stat the content. - # In this case, we return None and do not lock this link - warnings.warn( - "This platform cannot stat symlinks. Will not set them read-only." - ) - return None - s = os.stat(path) - orig_perm_string = stat.filemode(s.st_mode) - if os.chmod in os.supports_follow_symlinks: - follow_symlinks = True - else: - warnings.warn( - ": Platform does not support chmod of symlinks. " - "Links will not be set read-only.", - ) - return orig_perm_string - os.chmod( - path, - s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH), - follow_symlinks=follow_symlinks, - ) - return orig_perm_string - - -def make_readonly_recursive( - path: PathStr, - excluded: List[PathStr] = [], -) -> None: - """Recursively "freeze" a directory by setting all files and directories read-only""" - # traversing bottom-up makes it easier to freeze perms on directories - for root, dirs, files in os.walk(str(path), topdown=False): - for f in files: - p = os.path.join(root, f) - if p in excluded: - continue - remove_write_perms(os.path.join(Path(path), p)) - - for d in dirs: - p = os.path.join(root, d) - if p in excluded: - continue - remove_write_perms(os.path.join(path, p)) +PathStr = Union[str, os.PathLike[str]] class FileType(Enum): @@ -104,34 +64,59 @@ _FSEntryVersionT = TypeVar("_FSEntryVersionT", bound="FSEntryVersion") class FSEntryVersion: """A version of a file or directory.""" - id: Optional[int] filedir: "FSEntry" recorded_time: datetime # When was this version recorded? filetype: FileType deleted: bool # set True when recording a deleted file - unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' symlink_target: str # if this is a symlink, this is the (read but not fully # resolved) target. I.e. this is the "content" of the symlink. sha256: bytes source_task_id: Optional[int] = None + uuid: str = "" + + def __post_init__(self) -> None: + if self.uuid == "": + self.uuid = str(uuid.uuid4()) + + def persist( + self, + cur: sqlite3.Cursor, + source_task: program.Task, + ) -> None: + cur.execute( + "INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + self.uuid, + self.filedir.sha256, + datetime.now().timestamp(), + self.filetype, + False, + self.perms, + self.symlink_target, + self.sha256.hex(), + source_task.uuid, + ), + ) + assert isinstance(cur.lastrowid, int) @classmethod def from_row( cls: Type[_FSEntryVersionT], - row: Tuple[int, int, float, str, bool, str, str, str, Optional[int]], + row: Tuple[str, str, float, str, bool, str, str, str, Optional[int]], filedir: "FSEntry", ) -> _FSEntryVersionT: return cls( - row[0], # id filedir, # filedir datetime.fromtimestamp(row[2]), # recorded_time FileType(row[3]), # filetype row[4], # deleted - row[5], # unfrozen_perms + row[5], # perms row[6], # symlink_target bytes.fromhex(row[7]), # sha256 row[8], # source_task_id + uuid=row[0], # uuid ) @@ -143,40 +128,31 @@ _FSEntryT = TypeVar("_FSEntryT", bound="FSEntry") class FSEntry: """A hashed file or directory.""" - id: Optional[int] # defaults to None filename: str # with parent directory stripped. None if this is the root - relpath: str # relative to some root directory + relpath: str # relative to store root directory or / (if store is None) parent: Optional["FSEntry"] # upward link # children for dirs only: non-recursive; files/dirs at this level only children: List["FSEntry"] - filetype: Optional[ - FileType - ] # regular, symlink, special (block, char, pipe, or socket) - deleted: Optional[bool] - versions: Optional[List[FSEntryVersion]] = None - - # these will be filled from the version list automatically - unfrozen_perms: Optional[ - str - ] = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' - symlink_target: Optional[ - str - ] = None # if this is a symlink, this is the (read but not fully - # resolved) target. I.e. this is the "content" of the symlink. - sha256: Optional[bytes] = None - latest_version: Optional[FSEntryVersion] = None + versions: List[FSEntryVersion] = field(default_factory=list) + sha256: bytes = b"" + store: Optional["Store"] = None def __post_init__(self) -> None: - if self.versions is not None and len(self.versions) > 0: - self.latest_version = self.versions[-1] - self.unfrozen_perms = self.latest_version.unfrozen_perms - self.symlink_target = self.latest_version.symlink_target - self.sha256 = self.latest_version.sha256 + # derive hash from store, parent, and filename only (not children) + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("FSEntry:") + if self.store is not None: + upstr(self.store.uuid) + if self.parent is not None: + m.update(self.parent.sha256) + upstr(self.filename) + self.sha256 = m.digest() @classmethod def from_path( cls: Type[_FSEntryT], - root: PathStr, + store: Optional["Store"] = None, relpath: Optional[str] = None, exclude: List[str] = ["nancy.db"], parent: Optional[_FSEntryT] = None, @@ -184,15 +160,11 @@ class FSEntry: ) -> _FSEntryT: """ Scan a path to instantiate (recursive). - - Arguments: - root (str or PathLike): The root directory of an existing or new store path - relpath (str or PathLike): Path of some directory under the store - path in which to find files and directories. Only these entries - and their children will be included. """ m = hashlib.sha256() + root = "/" if store is None else store.path + if relpath is None: # top-level invocation at root path = root else: @@ -236,7 +208,7 @@ class FSEntry: children = [ cls.from_path( - root=root, + store=store, relpath=rp, direntry=e, ) @@ -248,10 +220,14 @@ class FSEntry: # changes without modifying the hashes of individual files, # which remain content-based for compatibility with # other tools - if c.unfrozen_perms is not None: - m.update(bytes(c.unfrozen_perms, "utf-8")) - if c.sha256 is not None: - m.update(c.sha256) + assert ( + len(c.versions) > 0 + ) # must have a version since we derived from files + ver = c.versions[-1] + if ver.perms is not None: + m.update(bytes(ver.perms, "utf-8")) + if ver.sha256 is not None: + m.update(ver.sha256) elif stat.S_ISREG(s): filetype = FileType.REG m.update(open(path, "rb").read()) @@ -275,118 +251,117 @@ class FSEntry: sha256 = m.digest() ob = cls( - id=None, + store=store, filename="." if relpath is None else os.path.basename(relpath), relpath="." if relpath is None else relpath, parent=parent, children=children, - filetype=None, - deleted=None, versions=[], ) # Update versions after the fact to get self-reference ob.versions = [ FSEntryVersion( - id=None, filedir=ob, recorded_time=datetime.now(), filetype=filetype, deleted=False, - unfrozen_perms=stat.filemode(s), + perms=stat.filemode(s), symlink_target=str(symlink_target), sha256=sha256, source_task_id=None, ) ] - # now change children's parents to point to this object + # point versions and children back to ob for v in ob.versions: v.filedir = ob - if len(ob.versions) > 0: - last_ver = ob.versions[-1] - ob.filetype = last_ver.filetype - ob.deleted = last_ver.deleted - ob.unfrozen_perms = last_ver.unfrozen_perms - ob.symlink_target = last_ver.symlink_target - ob.sha256 = last_ver.sha256 for c in ob.children: c.parent = ob return ob - @classmethod - def empty_root(cls: Type[_FSEntryT]) -> _FSEntryT: - """Just a standardized value indicating an empty root directory""" - return cls( - id=None, - filename=".", - relpath=".", - parent=None, - children=[], - filetype=FileType.DIR, - unfrozen_perms="----------", - sha256=hashlib.sha256().digest(), - deleted=False, + def persist( + self, + cur: sqlite3.Cursor, + source_task: program.Task, + parent_key: Optional[str] = None, + ) -> None: + # Find entries with this name and parent + cur.execute( + "SELECT sha256 FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1", + (self.filename, None if self.parent is None else self.parent.sha256), ) + res = cur.fetchall() + if len(res) == 0: + # create filedir entry and get its id + cur.execute( + "INSERT INTO filedir VALUES (?, ?, ?, ?)", + ( + self.sha256, + None if self.store is None else self.store.uuid, + self.filename, + parent_key, + ), + ) + self.versions[-1].persist(cur=cur, source_task=source_task) + + # descend into children and record all of them anew as well + for c in self.children: + c.persist(cur=cur, source_task=source_task, parent_key=self.sha256.hex()) + + def persist_delete(self, cur: sqlite3.Cursor, source_task: program.Task) -> None: + # add a new version to self and every child with deleted=True + raise NotImplementedError - # @logger.catch @classmethod - def from_db_index( + def from_db_key( cls: Type[_FSEntryT], cursor: sqlite3.Cursor, - root_id: Optional[int] = None, + store: "Store", + root_key: Optional[str] = None, root_row: Optional[ - Tuple[int, str, bool] + Tuple[str, str, str] # sha256, name, store ] = None, # TODO: Type the expected sqlite rows parent: Optional[_FSEntryT] = None, ) -> _FSEntryT: - """Given id of an entry in filedir, recursively fill this object""" + """Given key of an entry in filedir, recursively fill this object""" if root_row is None: - assert root_id is not None + assert root_key is not None cursor.execute( - "SELECT id, name, frozen FROM filedir WHERE id=?", - (root_id,), + "SELECT sha256, name, store FROM filedir WHERE sha256=?", + (root_key,), ) root_row = cursor.fetchone() - root_id, filename, frozen = root_row + root_key, filename, store_key = root_row + + assert store_key == store.uuid relpath = filename if parent is None else os.path.join(parent.relpath, filename) - # instantiate class before filling children + # instantiate class before filling children and versions ob = cls( - id=root_id, filename=filename, relpath=relpath, parent=parent, children=[], - filetype=None, - unfrozen_perms=None, - sha256=None, - deleted=None, versions=[], + store=store, ) cursor.execute( - "SELECT id, name, frozen FROM filedir WHERE parent=?", - (root_id,), + "SELECT sha256, name, store FROM filedir WHERE parent=?", + (root_key,), ) rows = cursor.fetchall() - ob.children = [cls.from_db_index(cursor, root_row=r, parent=ob) for r in rows] + ob.children = [ + cls.from_db_key(cursor, root_row=r, parent=ob, store=store) for r in rows + ] # get all versions cursor.execute( "SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time", - (root_id,), + (root_key,), ) matches = cursor.fetchall() - versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches] - - if len(versions) > 0: - last_ver = versions[-1] - ob.filetype = last_ver.filetype - ob.deleted = last_ver.deleted - ob.unfrozen_perms = last_ver.unfrozen_perms - ob.symlink_target = last_ver.symlink_target - ob.sha256 = last_ver.sha256 - ob.latest_version = last_ver + ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches] return ob @@ -409,19 +384,13 @@ class FSEntry: childsec = childsep + childsep.join(c for c in childstrs) # TODO: list versions in str() - # versions: [FSEntryVersion] = [] - return "\n".join( (" " * level) + line - for line in f"""id: {self.id} + for line in f"""sha256: {self.sha256.hex()} filename: {self.filename} relpath: {self.relpath} parent (relpath): {'None' if self.parent is None else self.parent.relpath} -filetype: {self.filetype} -deleted: {self.deleted} -unfrozen_perms: {self.unfrozen_perms} -symlink_target: {self.symlink_target} -sha256: {'None' if self.sha256 is None else self.sha256.hex()} +num versions: {len(self.versions)} children: {childsec} """.splitlines() ) @@ -448,12 +417,18 @@ class FSDiff: @staticmethod def compare(A: FSEntry, B: FSEntry) -> bool: - return ( - A.sha256 == B.sha256 - and A.unfrozen_perms == B.unfrozen_perms - and A.filetype == B.filetype - and A.deleted == B.deleted - ) + # get latest versions + Alatest = None if len(A.versions) == 0 else A.versions[-1] + Blatest = None if len(B.versions) == 0 else B.versions[-1] + if Alatest is None or Blatest is None: + return False + else: + return ( + Alatest.sha256 == Blatest.sha256 + and Alatest.perms == Blatest.perms + and Alatest.filetype == Blatest.filetype + and Alatest.deleted == Blatest.deleted + ) def filename(self) -> str: if self.A is not None: @@ -462,14 +437,6 @@ class FSDiff: assert self.B is not None return self.B.filename - def filetype(self) -> Optional[FileType]: - if self.A is not None: - return self.A.filetype - elif self.B is not None: - return self.B.filetype - else: - return None - @classmethod def compute( cls: Type[_FSDiffT], A: Optional[FSEntry], B: Optional[FSEntry] @@ -528,6 +495,24 @@ class FSDiff: return cls(A, B, modified_children) + def persist( + self, + cur: sqlite3.Cursor, + source_task: program.Task, + parent: Optional[_FSDiffT] = None, + ) -> None: + """Record this level of a diff.""" + if self.A is None: # new file + assert self.B is not None + self.B.persist(cur, source_task) + elif self.B is None: # deleted file + self.A.persist_delete(cur, source_task) + else: + # either this node modified, or children are + + for c in self.modified_children: + c.persist(cur=cur, source_task=source_task, parent=self) + def flatten_tree(self, level: int = 0) -> List[Tuple[int, "FSDiff"]]: """Return list of all entries, with level, in pairs""" pairs = [(level, self)] diff --git a/src/nancy/io.py b/src/nancy/io.py new file mode 100644 index 0000000..7f9a03d --- /dev/null +++ b/src/nancy/io.py @@ -0,0 +1,21 @@ +from .data import Box + +from dataclasses import dataclass, InitVar +from pathlib import Path + + +@dataclass +class FileDir(Box[Path]): + mode: InitVar[str] = "rw" + + def __post_init__(self, mode: str = "rw") -> None: + self.reads = "r" in mode + self.writes = "w" in mode + + +class Dir(FileDir): + pass + + +class File(FileDir): + pass diff --git a/src/nancy/machine.py b/src/nancy/machine.py index 9fcc94e..a783b28 100644 --- a/src/nancy/machine.py +++ b/src/nancy/machine.py @@ -1,6 +1,5 @@ from dataclasses import dataclass import hashlib -import json import platform import sqlite3 import time diff --git a/src/nancy/program.py b/src/nancy/program.py new file mode 100644 index 0000000..b5ca5ab --- /dev/null +++ b/src/nancy/program.py @@ -0,0 +1,206 @@ +"""Programs and tasks.""" + +from loguru import logger + +from . import environment + +from dataclasses import dataclass +import datetime +import hashlib +import os +import sqlite3 +from typing import Any, Callable, List, Optional, Type, TYPE_CHECKING +import uuid + +if TYPE_CHECKING: # avoid cyclic imports but enable proper type checking + from .store import Store + + +@dataclass(slots=True, frozen=True) +class Package: + name: str + version: str + language: str = "Python" + sha256: bytes = b"" + + def __post_init__(self) -> None: + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("Package:") + upstr(self.name) + upstr(self.version) + upstr(self.language) + object.__setattr__(self, "sha256", m.digest()) + + +@dataclass(slots=True, frozen=True) +class Module: + name: str + code: Optional[str] + package: Optional[Package] + sha256: bytes = b"" + + def __post_init__(self) -> None: + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("Module:") + upstr(self.name) + if self.code is not None: + upstr(self.code) + if self.package is not None: + m.update(self.package.sha256) + object.__setattr__(self, "sha256", m.digest()) + + +@dataclass(slots=True, frozen=True) +class Function: + name: str + module: Module + func: Callable[[Any], Any] + inputs: List["FunctionInput"] # not included in hash + outputs: List["FunctionOutput"] + sha256: bytes = b"" + + def __post_init__(self) -> None: + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("Function:") + upstr(self.name) + m.update(self.module.sha256) + object.__setattr__(self, "sha256", m.digest()) + + +@dataclass(slots=True, frozen=True) +class FunctionInput: + name: str + description: str + required: bool + function: Function + position: int + argtype: Type[Any] + const: bool = False + sha256: bytes = b"" + + def __post_init__(self) -> None: + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("FunctionInput") + upstr(self.name) + upstr(self.description) + m.update(self.function.sha256) + object.__setattr__(self, "sha256", m.digest()) + + +@dataclass(slots=True, frozen=True) +class FunctionOutput: + name: str + function: Function + position: int + sha256: bytes = b"" + + def __post_init__(self) -> None: + m = hashlib.sha256() + upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8")) + upstr("FunctionOutput") + upstr(self.name) + object.__setattr__(self, "sha256", m.digest()) + + +@dataclass(slots=True) +class Task: + program: "Program" + uuid: str = "" + py_function: Optional[Function] = None + + def __post_init__(self) -> None: + self.uuid = str(uuid.uuid4()) + + def insert(self, cur: sqlite3.Cursor) -> None: + func_id = None if self.py_function is None else self.py_function.sha256.hex() + cur.execute( + "INSERT INTO task VALUES (?, ?, ?)", + (self.uuid, self.program.uuid, func_id), + ) + + +@dataclass(slots=True) +class Program: + store: "Store" + name: str + message: str + + start_time: Optional[datetime.datetime] = None + evaluated: bool = False + + uuid: str = "" + + def __post_init__(self) -> None: + self.uuid = str(uuid.uuid4()) + + def __enter__(self) -> "Program": + if self.evaluated: + raise RuntimeError("Cannot re-enter a Program context") + + env = environment.Environment.detect() + with self.store.committing() as cur: + env.maybe_insert(cur) + cur.execute( + "INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + self.uuid, # uuid + self.name, # name TEXT, + # name of the program, usually written lowercase by calling + # code e.g. cnn_crossval + # -- we use POSIX timestamps for time recording. + # -- e.g. datetime.datetime.now().timestamp() + None, # start_time REAL, + None, # end_time REAL, + os.getpid(), # process_id INTEGER, -- host PID of python process on host OS + env.sha256.hex(), # environment INTEGER NOT NULL, + self.message, # user-defined message to help distinguish similar runs + ), + ) + self.start_time = datetime.datetime.now() + return self + + def new_task( + self, + name: str, + cur: sqlite3.Cursor, + py_function: Optional[Function] = None, + ) -> Task: + """Create a new task and return its uuid""" + t = Task(program=self, py_function=py_function) + t.insert(cur=cur) + return t + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[Any], + ) -> None: + end_time = datetime.datetime.now() + # record start and end times in store + + assert self.store.conn is not None + with self.store.committing() as cur: + cur.execute( + """ + UPDATE + program + SET + start_time = ?, + end_time = ? + WHERE + uuid = ? + """, + (self.start_time, end_time, self.uuid), + ) + self.evaluated = True # prevent re-running + assert self.start_time is not None + elapsed = end_time - self.start_time + logger.success( + f"Program [{self.uuid}] {self.name} " + f"(message:{self.message}) ran in {elapsed} seconds." + ) diff --git a/src/nancy/schema/version0.sql b/src/nancy/schema/version0.sql index beb0104..69bbb11 100644 --- a/src/nancy/schema/version0.sql +++ b/src/nancy/schema/version0.sql @@ -87,7 +87,7 @@ CREATE TABLE store ( -- imported and live outside the current store. -- We do not support renaming files. CREATE TABLE filedir ( - uuid TEXT PRIMARY KEY NOT NULL, + sha256 TEXT PRIMARY KEY NOT NULL, store TEXT NOT NULL, name TEXT, -- only a filename, not a path parent TEXT REFERENCES filedir ON UPDATE CASCADE, @@ -96,7 +96,7 @@ CREATE TABLE filedir ( -- Detect cross-store references CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir BEGIN SELECT CASE - WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE uuid = NEW.parent) + WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE sha256 = NEW.parent) THEN RAISE (ABORT, 'Parent resides in different store') END; END; CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir @@ -123,8 +123,7 @@ CREATE TABLE filedir_version ( filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details deleted BOOL NOT NULL, -- set True when recording a deleted file - -- We record the permissions on each file, in a way that enables reloading - -- permissions properly when thawing after a freeze operation. + -- We record the permissions on each file to enable fixing if needed perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink. @@ -159,7 +158,7 @@ END; END; CREATE TABLE environment ( sha256 TEXT PRIMARY KEY NOT NULL, - envvars_json TEXT, -- json.dumps(dict(os.environ)) + envvars_json TEXT, -- json.dumps(dict(os.environ), sort_keys=True) python_implementation TEXT, -- platform.python_implementation(): 'cpython' python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]' python_hexversion INTEGER, -- sys.hexversion: 50923504 diff --git a/src/nancy/store.py b/src/nancy/store.py index bb1d005..715f993 100644 --- a/src/nancy/store.py +++ b/src/nancy/store.py @@ -2,7 +2,7 @@ from loguru import logger -from . import db, environment, fs +from . import db, environment, fs, program from contextlib import contextmanager from dataclasses import dataclass @@ -14,96 +14,6 @@ from typing import Any, Iterator, Optional, TypeVar, Type, Union import uuid import warnings - -@dataclass(slots=True) -class Program: - store: "Store" - name: str - message: str - - id: Optional[int] = None - start_time: Optional[datetime.datetime] = None - evaluated: bool = False - - uuid: str = "" - - def __post_init__(self) -> None: - self.uuid = str(uuid.uuid4()) - - def __enter__(self) -> "Program": - if self.evaluated: - raise RuntimeError("Cannot re-enter a Program context") - - env = environment.Environment.detect() - with self.store.committing() as cur: - env.maybe_insert(cur) - cur.execute( - "INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)", - ( - self.uuid, # uuid - self.name, # name TEXT, - # name of the program, usually written lowercase by calling - # code e.g. cnn_crossval - # -- we use POSIX timestamps for time recording. - # -- e.g. datetime.datetime.now().timestamp() - None, # start_time REAL, - None, # end_time REAL, - os.getpid(), # process_id INTEGER, -- host PID of python process on host OS - env.sha256.hex(), # environment INTEGER NOT NULL, - self.message, # user-defined message to help distinguish similar runs - ), - ) - self.id = cur.lastrowid - self.start_time = datetime.datetime.now() - return self - - def new_task( - self, - name: str, - cur: sqlite3.Cursor, - py_function_id: Optional[int] = None, - ) -> int: - """Create a new task and return its id""" - cur.execute( - "INSERT INTO task VALUES (?, ?, ?)", - (None, self.id, py_function_id), - ) - taskid = cur.lastrowid - assert isinstance(taskid, int) - return taskid - - def __exit__( - self, - exc_type: Optional[Type[BaseException]], - exc: Optional[BaseException], - traceback: Optional[Any], - ) -> None: - end_time = datetime.datetime.now() - # record start and end times in store - - assert self.store.conn is not None - with self.store.committing() as cur: - cur.execute( - """ - UPDATE - program - SET - start_time = ?, - end_time = ? - WHERE - uuid = ? - """, - (self.start_time, end_time, self.uuid), - ) - self.evaluated = True # prevent re-running - assert self.start_time is not None - elapsed = end_time - self.start_time - logger.success( - f"Program [{self.id}] {self.name} " - f"(message:{self.message}) ran in {elapsed} seconds." - ) - - # see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance _StoreT = TypeVar("_StoreT", bound="Store") @@ -111,34 +21,25 @@ _StoreT = TypeVar("_StoreT", bound="Store") class Store: """Describes a data directory, holds active connection to nancy.db""" - path: Optional[fs.PathStr] + path: fs.PathStr db_path: fs.PathStr conn: Optional[sqlite3.Connection] + uuid: str def __init__( self, - directory: Optional[fs.PathStr] = None, + directory: fs.PathStr, conn: Optional[sqlite3.Connection] = None, ): - """ - Arguments: - directory (str): Location of existing store directory. If omitted - or None, initialize a store in memory, with no associated - directory. - """ - if directory is None: - self.path = None - self.db_path = ":memory:" - else: - self.path = Path(directory) - self.db_path = self.path / "nancy.db" + self.path = Path(directory) + self.db_path = self.path / "nancy.db" if conn is None: self.connect() else: self.conn = conn - self.store_uuid = self.find_store_uuid() + self.uuid = self.find_store_uuid() def copy(self: _StoreT, store_path: fs.PathStr) -> _StoreT: """Copy this store to a new store path""" @@ -162,7 +63,7 @@ class Store: yield cur self.conn.commit() - def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]: + def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> str: assert self.conn is not None if cur is None: cur = self.conn.cursor() @@ -170,25 +71,22 @@ class Store: 'SELECT value FROM local_metadata WHERE key == "store_uuid" LIMIT 1' ) res = cur.fetchone() - return None if res is None else res[0] + assert res is not None + (self.uuid,) = res + return self.uuid @classmethod - def init( - cls: Type[_StoreT], message: str, directory: Optional[fs.PathStr] = None - ) -> _StoreT: + def init(cls: Type[_StoreT], message: str, directory: fs.PathStr) -> _StoreT: start_time = datetime.datetime.now() - if directory is None: # initialize an in-memory store - db_path = ":memory:" - else: - if not os.path.isdir(directory): - raise FileNotFoundError( - f"Directory {directory} must exist before initializing a store there.", - ) - db_path = os.path.join(directory, "nancy.db") - if os.path.isfile(db_path): - raise FileExistsError( - f"File {db_path} exists. Refusing to re-initialize", - ) + if not os.path.isdir(directory): + raise FileNotFoundError( + f"Directory {directory} must exist before initializing a store there.", + ) + db_path = os.path.join(directory, "nancy.db") + if os.path.isfile(db_path): + raise FileExistsError( + f"File {db_path} exists. Refusing to re-initialize", + ) # initialize a database in the target directory conn = sqlite3.connect(db_path, isolation_level="DEFERRED") cur = conn.cursor() @@ -196,36 +94,35 @@ class Store: conn.commit() cur.close() - new_store = cls(directory, conn) + new_store = cls(directory=directory, conn=conn) - with new_store.program("INIT", message) as p: + with new_store.new_program("INIT", message) as p: # set the timing to the actual times it took to initialize the db p.start_time = start_time # generate a new UUID for this store - assert new_store.store_uuid is None - new_store.store_uuid = str(uuid.uuid4()) + assert new_store.uuid is None + new_store.uuid = str(uuid.uuid4()) with new_store.committing() as cur: cur.execute( 'INSERT INTO local_metadata VALUES ("store_uuid", ?)', - (new_store.store_uuid,), + (new_store.uuid,), ) return new_store - def make_readonly(self) -> None: - """Make store directory read-only (except for nancy.db) and return file list""" - fs.make_readonly_recursive(str(self.path), excluded=["./nancy.db"]) - - def filedir_root_index(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[int]: - """Get the database id for the table entry in this store having name '.'""" + def filedir_root_key(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]: + """Get the database key for the table entry in this store having name '.'.""" if cur is None: assert self.conn is not None cur = self.conn.cursor() - cur.execute("SELECT id FROM filedir WHERE store=1 AND parent is NULL") - (root_id,) = cur.fetchone() - assert isinstance(root_id, int) - return root_id + cur.execute("SELECT uuid FROM filedir WHERE store=1 AND parent is NULL") + row = cur.fetchone() + if row is None: + return None + (root_key,) = row + assert isinstance(root_key, str) + return root_key def path_to_fsentry(self, path: fs.PathStr) -> Optional[fs.FSEntry]: """Find a path in the filedir database and return it as an fsentry. @@ -242,156 +139,72 @@ class Store: ) # rel tells us how to descend recurively to find the filedir for path - fd_id = self.filedir_root_index(cur) - if fd_id is None: + fd_key = self.filedir_root_key(cur) + if fd_key is None: # Root isn't even inserted into the db yet return None for p in Path(rel).parts: # Path.parts splits a path reliably # get child with that name cur.execute( - "SELECT id, filetype FROM filedir WHERE filename=? AND parent=? LIMIT 1", - (p, fd_id), + "SELECT uuid, filetype FROM filedir WHERE filename=? AND parent=? LIMIT 1", + (p, fd_key), ) row = cur.fetchone() if row is None: return None - fd_id, filetype = row + fd_key, filetype = row - return fs.FSEntry.from_db_index(cur, root_id=fd_id) + return fs.FSEntry.from_db_key(cur, root_key=fd_key, store=self) def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]: """Return recursive structure containing FSEntry objects from db""" - root_id = self.filedir_root_index() - if root_id is None: + root_key = self.filedir_root_key() + if root_key is None: return None else: assert self.conn is not None - return fs.FSEntry.from_db_index(self.conn.cursor(), root_id=root_id) + return fs.FSEntry.from_db_key( + self.conn.cursor(), root_key=root_key, store=self + ) - def program(self, name: str, message: str) -> Program: - p = Program(self, name, message) + def new_program(self, name: str, message: str) -> program.Program: + p = program.Program(self, name, message) return p def diff(self) -> fs.FSDiff: """ - Find changes to files and dirs compared to their recorded versions + Find changes to files and dirs compared to their recorded versions. """ # get info about files currently at the given locations - current = fs.FSEntry.from_path(str(self.path)) + current = fs.FSEntry.from_path(store=self) # then find a listing covering all the expected paths recorded = self.fs_entries(shallow=True) return fs.FSDiff.compute(recorded, current) - def _record_file_version( - self, - cur: sqlite3.Cursor, - ob: fs.FSEntry, - filedir_id: int, - source_task: Optional[int] = None, - ) -> int: - cur.execute( - "INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", - ( - None, - filedir_id, - datetime.datetime.now().timestamp(), - ob.filetype, - False, - ob.unfrozen_perms, - ob.symlink_target, - None if ob.sha256 is None else ob.sha256.hex(), - source_task, - ), - ) - assert isinstance(cur.lastrowid, int) - return cur.lastrowid - - def _record_new_file_recursive( - self, - ob: fs.FSEntry, - cur: sqlite3.Cursor, - parent_id: Optional[int], - source_task: Optional[int], - ) -> None: - # Find entries with this name and parent - cur.execute( - "SELECT id FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1", - (ob.filename, None if ob.parent is None else ob.parent.id), - ) - res = cur.fetchall() - if len(res) == 0: - # create filedir entry and get its id - cur.execute( - "INSERT INTO filedir VALUES (?, ?, ?, ?, ?)", - ( - None, - 1, - ob.filename, - parent_id, - False, - ), - ) - thisid = cur.lastrowid - else: - (thisid,) = res[0] - assert isinstance(thisid, int) - - self._record_file_version(cur, ob, thisid, source_task=source_task) - - # descend into children and record all of them anew as well - for c in ob.children: - self._record_new_file_recursive(c, cur, thisid, source_task) - - def _record_recursive( - self, - diff: fs.FSDiff, - cur: sqlite3.Cursor, - parent_id: Optional[int] = None, - source_task: Optional[int] = None, - ) -> None: - """Record this level of a diff.""" - if diff.A is None: - assert diff.B is not None - self._record_new_file_recursive( - diff.B, cur, parent_id, source_task=source_task - ) - elif diff.B is None: - # self._record_deleted_file_recursive(diff.B, cur, parent_id) - pass - else: - # possibly modified, record new version then recurse into children - self._record_new_file_recursive( - diff.B, cur, parent_id, source_task=source_task - ) - assert diff.A.id is not None - self._record_file_version(cur, diff.B, diff.A.id, source_task=source_task) - - # descend into children - def record( self, diff: fs.FSDiff, message: str, - parent_id: Optional[int] = None, + parent_id: Optional[str] = None, cur: Optional[sqlite3.Cursor] = None, ) -> None: if cur is None: assert self.conn is not None cur = self.conn.cursor() - with self.program("RECORD", message) as p: + with self.new_program("RECORD", message) as p: with self.committing() as cur: # entire record operation is one transaction # create a task for this operation - task_id = p.new_task("Store._record_recursive", cur=cur) + task = p.new_task(name="Store._record_recursive", cur=cur) # descend the diff, tracking parent filedir IDs, creating them and # recording new versions of each, when necessary - self._record_recursive(diff, cur, source_task=task_id) + diff.persist(cur=cur, source_task=task) def find_store(path: Union[str, "os.PathLike[str]"]) -> Optional[str]: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cli/test_record.py b/tests/cli/test_record.py new file mode 100644 index 0000000..aa0dd0d --- /dev/null +++ b/tests/cli/test_record.py @@ -0,0 +1,38 @@ +from click.testing import CliRunner +from nancy.cli import main +import pytest + +import os +from pathlib import Path +import tempfile +from typing import Iterator + + +@pytest.fixture +def junk_dir() -> Iterator[Path]: + """Create a temp directory with a few files""" + with tempfile.TemporaryDirectory(prefix="nancy_junkdir") as d: + root = Path(d) + open(root / "empty.txt", "w").close() # touch a file + open(root / "full.txt", "w").write("something") # touch a file + os.mkdir(root / "d") + open(root / "foo.txt", "w").write("bar") # touch a file + yield root + + +def test_record(junk_dir: Path) -> None: + runner = CliRunner() + result = runner.invoke( + main, + [ + "record", + "-s", + str(junk_dir), + "-m", + "This is just a test recording", + ], + input="y\n", + ) + print(result.output) + assert result.exit_code == 0 + assert "ERROR" not in result.output diff --git a/tests/test_store.py b/tests/test_store.py index 8429516..dcc6943 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -11,7 +11,7 @@ from typing import Iterator @pytest.fixture def bare_dir() -> Iterator[Path]: - """Create an emptry temp directory""" + """Create an empty temp directory""" with tempfile.TemporaryDirectory(prefix="nancy_testdir") as d: yield Path(d) @@ -33,8 +33,8 @@ def test_record_untracked_dir(filled_dir: Path) -> None: @pytest.fixture -def empty_store() -> Iterator[store.Store]: - s = store.Store.init(message="test init") +def empty_store(bare_dir: Path) -> Iterator[store.Store]: + s = store.Store.init(directory=bare_dir, message="test init") yield s