Many changes to keys and move persist() to fs.py

This fixes a lot of awkwardness that came from having the record()
functionality inside of store.py. It is still broken, but is much closer
to actually working now. I also sketched some data and io functionality,
which has no tests and is not yet working at all.
This commit is contained in:
Jacob Hinkle 2022-10-11 12:27:40 -04:00
parent 07ccef601c
commit 8e69ca3390
15 changed files with 523 additions and 486 deletions

View File

@ -37,7 +37,7 @@ deps =
pytest-cov
coverage
commands =
pytest --cov src/nancy
pytest --cov {envsitepackagesdir}/nancy
[testenv:mypy]
deps =

View File

@ -3,7 +3,6 @@ from loguru import logger
from ..version import __version__
# from .freeze import freeze, thaw
from . import diff
from . import record
@ -50,8 +49,6 @@ def main(log_level: str) -> None:
logger.add(sys.stderr, level=log_level)
# main.add_command(freeze)
# main.add_command(thaw)
main.add_command(diff.status, name="status")
main.add_command(record.record_cli, name="record")
main.add_command(version)

View File

@ -38,6 +38,11 @@ def print_diff(
hashcolor = Fore.MAGENTA if use_color else ""
def _print_row(tag: str, entry: fs.FSEntry, level: int) -> None:
if len(entry.versions) == 0:
print(Fore.RED + "NOVERSIONS" + Style.RESET_ALL + entry.sha256)
else:
ver = entry.versions[-1]
relpath = entry.relpath
# Format relpath using filetype-based colors
@ -48,25 +53,25 @@ def print_diff(
dirstr = (
(filetypecolors["DIR"] + dname + "/" + reset) if dname != "" else ""
)
assert entry.filetype is not None
fname = filetypecolors.get(str(entry.filetype), "") + fname + reset
assert ver.filetype is not None
fname = filetypecolors.get(str(ver.filetype), "") + fname + reset
if entry.filetype == fs.FileType.LNK: # append symlink target
assert entry.symlink_target is not None
fname += " -> " + entry.symlink_target
if ver.filetype == fs.FileType.LNK: # append symlink target
assert ver.symlink_target is not None
fname += " -> " + ver.symlink_target
relpath = dirstr + fname
assert entry.sha256 is not None
hashchange = (
(hashcolor + entry.sha256.hex() + reset + " " + changetags[tag])
(hashcolor + ver.sha256.hex() + reset + " " + changetags[tag])
if show_hashes
else changetags[tag]
)
print(
hashchange,
entry.unfrozen_perms,
ver.perms,
relpath,
)

View File

@ -1,45 +0,0 @@
import click
from .. import store
import os
from typing import List
@click.command()
@click.argument("directory")
def freeze(directory: str) -> None:
"""
Initialize tracking in a directory or freeze a tracked directory.
If DIRECTORY is not already part of an existing nancy store, then a new
'nancy.db' file is created in that directory. On the other hand, if the
directory is part of an existing store, it will be re-frozen and versions
of any files changes since thawing will be incremented.
"""
if not os.path.isdir(directory):
raise ValueError(f"Cannot freeze non-existent directory {directory}")
existing_store = store.find_store(directory)
if existing_store is None: # this is a new store
s = store.Store.init(directory)
else: # this is an existing store
s = store.Store(directory)
# s.freeze()
@click.command()
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
def thaw(files: List[str]) -> None:
"""
Enable manual alteration of files within a tracked directory.
This command is meant to be used in conjunction with the 'freeze'
subcommand. After thawing, changes may be made in the current directory,
after which `nancy freeze` should be run changes may be made in the current
directory, after which `nancy freeze` should be run. At that point, changes
will be recorded: new files will be detected and modified files will have
their version numbers incremented.
"""
pass

19
src/nancy/data.py Normal file
View File

@ -0,0 +1,19 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Generic, Optional, TypeVar
import uuid
T = TypeVar("T")
@dataclass(slots=True)
class Box(Generic[T]):
value: Optional[T] = None
uuid: str = ""
version: int = 0 # incremented whenever passed as a non-const argument
def __post_init__(self) -> None:
self.uuid = str(uuid.uuid4())
StrBox = Box[str]

View File

@ -85,13 +85,13 @@ class Environment:
fdor = ""
try:
fdor = json.dumps(platform.freedesktop_os_release())
fdor = json.dumps(platform.freedesktop_os_release(), sort_keys=True)
except AttributeError:
# freedesktop_os_release only available for python >= 3.10
fdor = ""
return cls(
json.dumps(dict(os.environ)),
json.dumps(dict(os.environ), sort_keys=True),
platform.python_implementation(),
sys.version,
sys.hexversion,
@ -99,6 +99,6 @@ class Environment:
timezone=time.tzname[time.daylight],
release=platform.release(),
freedesktop_os_release=fdor,
win32_ver=json.dumps(platform.win32_ver()),
mac_ver=json.dumps(platform.mac_ver()),
win32_ver=json.dumps(platform.win32_ver(), sort_keys=True),
mac_ver=json.dumps(platform.mac_ver(), sort_keys=True),
)

View File

@ -2,7 +2,9 @@
from loguru import logger
from dataclasses import dataclass
from . import program
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
import hashlib
@ -11,68 +13,26 @@ import os
from pathlib import Path
import sqlite3
import stat
from typing import Any, AnyStr, List, Optional, Tuple, TypeVar, Type, Union
from typing import (
Any,
AnyStr,
Callable,
List,
Optional,
Tuple,
TypeVar,
Type,
Union,
TYPE_CHECKING,
)
import uuid
import warnings
PathStr = Union[str, Path, "os.PathLike[str]"]
if TYPE_CHECKING:
from .store import Store
def remove_write_perms(path: PathStr) -> Optional[str]:
"""Remove write permissions for all users while preserving other perms"""
if not os.path.islink(path):
s = os.stat(path)
orig_perm_string = stat.filemode(s.st_mode)
os.chmod(
path,
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
)
follow_symlinks = False
else:
if os.stat not in os.supports_follow_symlinks:
# can't stat this thing directly on this platform
# means we can only stat the content.
# In this case, we return None and do not lock this link
warnings.warn(
"This platform cannot stat symlinks. Will not set them read-only."
)
return None
s = os.stat(path)
orig_perm_string = stat.filemode(s.st_mode)
if os.chmod in os.supports_follow_symlinks:
follow_symlinks = True
else:
warnings.warn(
": Platform does not support chmod of symlinks. "
"Links will not be set read-only.",
)
return orig_perm_string
os.chmod(
path,
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
follow_symlinks=follow_symlinks,
)
return orig_perm_string
def make_readonly_recursive(
path: PathStr,
excluded: List[PathStr] = [],
) -> None:
"""Recursively "freeze" a directory by setting all files and directories read-only"""
# traversing bottom-up makes it easier to freeze perms on directories
for root, dirs, files in os.walk(str(path), topdown=False):
for f in files:
p = os.path.join(root, f)
if p in excluded:
continue
remove_write_perms(os.path.join(Path(path), p))
for d in dirs:
p = os.path.join(root, d)
if p in excluded:
continue
remove_write_perms(os.path.join(path, p))
PathStr = Union[str, os.PathLike[str]]
class FileType(Enum):
@ -104,34 +64,59 @@ _FSEntryVersionT = TypeVar("_FSEntryVersionT", bound="FSEntryVersion")
class FSEntryVersion:
"""A version of a file or directory."""
id: Optional[int]
filedir: "FSEntry"
recorded_time: datetime # When was this version recorded?
filetype: FileType
deleted: bool # set True when recording a deleted file
unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target: str # if this is a symlink, this is the (read but not fully
# resolved) target. I.e. this is the "content" of the symlink.
sha256: bytes
source_task_id: Optional[int] = None
uuid: str = ""
def __post_init__(self) -> None:
if self.uuid == "":
self.uuid = str(uuid.uuid4())
def persist(
self,
cur: sqlite3.Cursor,
source_task: program.Task,
) -> None:
cur.execute(
"INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
self.uuid,
self.filedir.sha256,
datetime.now().timestamp(),
self.filetype,
False,
self.perms,
self.symlink_target,
self.sha256.hex(),
source_task.uuid,
),
)
assert isinstance(cur.lastrowid, int)
@classmethod
def from_row(
cls: Type[_FSEntryVersionT],
row: Tuple[int, int, float, str, bool, str, str, str, Optional[int]],
row: Tuple[str, str, float, str, bool, str, str, str, Optional[int]],
filedir: "FSEntry",
) -> _FSEntryVersionT:
return cls(
row[0], # id
filedir, # filedir
datetime.fromtimestamp(row[2]), # recorded_time
FileType(row[3]), # filetype
row[4], # deleted
row[5], # unfrozen_perms
row[5], # perms
row[6], # symlink_target
bytes.fromhex(row[7]), # sha256
row[8], # source_task_id
uuid=row[0], # uuid
)
@ -143,40 +128,31 @@ _FSEntryT = TypeVar("_FSEntryT", bound="FSEntry")
class FSEntry:
"""A hashed file or directory."""
id: Optional[int] # defaults to None
filename: str # with parent directory stripped. None if this is the root
relpath: str # relative to some root directory
relpath: str # relative to store root directory or / (if store is None)
parent: Optional["FSEntry"] # upward link
# children for dirs only: non-recursive; files/dirs at this level only
children: List["FSEntry"]
filetype: Optional[
FileType
] # regular, symlink, special (block, char, pipe, or socket)
deleted: Optional[bool]
versions: Optional[List[FSEntryVersion]] = None
# these will be filled from the version list automatically
unfrozen_perms: Optional[
str
] = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target: Optional[
str
] = None # if this is a symlink, this is the (read but not fully
# resolved) target. I.e. this is the "content" of the symlink.
sha256: Optional[bytes] = None
latest_version: Optional[FSEntryVersion] = None
versions: List[FSEntryVersion] = field(default_factory=list)
sha256: bytes = b""
store: Optional["Store"] = None
def __post_init__(self) -> None:
if self.versions is not None and len(self.versions) > 0:
self.latest_version = self.versions[-1]
self.unfrozen_perms = self.latest_version.unfrozen_perms
self.symlink_target = self.latest_version.symlink_target
self.sha256 = self.latest_version.sha256
# derive hash from store, parent, and filename only (not children)
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("FSEntry:")
if self.store is not None:
upstr(self.store.uuid)
if self.parent is not None:
m.update(self.parent.sha256)
upstr(self.filename)
self.sha256 = m.digest()
@classmethod
def from_path(
cls: Type[_FSEntryT],
root: PathStr,
store: Optional["Store"] = None,
relpath: Optional[str] = None,
exclude: List[str] = ["nancy.db"],
parent: Optional[_FSEntryT] = None,
@ -184,15 +160,11 @@ class FSEntry:
) -> _FSEntryT:
"""
Scan a path to instantiate (recursive).
Arguments:
root (str or PathLike): The root directory of an existing or new store path
relpath (str or PathLike): Path of some directory under the store
path in which to find files and directories. Only these entries
and their children will be included.
"""
m = hashlib.sha256()
root = "/" if store is None else store.path
if relpath is None: # top-level invocation at root
path = root
else:
@ -236,7 +208,7 @@ class FSEntry:
children = [
cls.from_path(
root=root,
store=store,
relpath=rp,
direntry=e,
)
@ -248,10 +220,14 @@ class FSEntry:
# changes without modifying the hashes of individual files,
# which remain content-based for compatibility with
# other tools
if c.unfrozen_perms is not None:
m.update(bytes(c.unfrozen_perms, "utf-8"))
if c.sha256 is not None:
m.update(c.sha256)
assert (
len(c.versions) > 0
) # must have a version since we derived from files
ver = c.versions[-1]
if ver.perms is not None:
m.update(bytes(ver.perms, "utf-8"))
if ver.sha256 is not None:
m.update(ver.sha256)
elif stat.S_ISREG(s):
filetype = FileType.REG
m.update(open(path, "rb").read())
@ -275,118 +251,117 @@ class FSEntry:
sha256 = m.digest()
ob = cls(
id=None,
store=store,
filename="." if relpath is None else os.path.basename(relpath),
relpath="." if relpath is None else relpath,
parent=parent,
children=children,
filetype=None,
deleted=None,
versions=[],
)
# Update versions after the fact to get self-reference
ob.versions = [
FSEntryVersion(
id=None,
filedir=ob,
recorded_time=datetime.now(),
filetype=filetype,
deleted=False,
unfrozen_perms=stat.filemode(s),
perms=stat.filemode(s),
symlink_target=str(symlink_target),
sha256=sha256,
source_task_id=None,
)
]
# now change children's parents to point to this object
# point versions and children back to ob
for v in ob.versions:
v.filedir = ob
if len(ob.versions) > 0:
last_ver = ob.versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
for c in ob.children:
c.parent = ob
return ob
@classmethod
def empty_root(cls: Type[_FSEntryT]) -> _FSEntryT:
"""Just a standardized value indicating an empty root directory"""
return cls(
id=None,
filename=".",
relpath=".",
parent=None,
children=[],
filetype=FileType.DIR,
unfrozen_perms="----------",
sha256=hashlib.sha256().digest(),
deleted=False,
def persist(
self,
cur: sqlite3.Cursor,
source_task: program.Task,
parent_key: Optional[str] = None,
) -> None:
# Find entries with this name and parent
cur.execute(
"SELECT sha256 FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1",
(self.filename, None if self.parent is None else self.parent.sha256),
)
res = cur.fetchall()
if len(res) == 0:
# create filedir entry and get its id
cur.execute(
"INSERT INTO filedir VALUES (?, ?, ?, ?)",
(
self.sha256,
None if self.store is None else self.store.uuid,
self.filename,
parent_key,
),
)
self.versions[-1].persist(cur=cur, source_task=source_task)
# descend into children and record all of them anew as well
for c in self.children:
c.persist(cur=cur, source_task=source_task, parent_key=self.sha256.hex())
def persist_delete(self, cur: sqlite3.Cursor, source_task: program.Task) -> None:
# add a new version to self and every child with deleted=True
raise NotImplementedError
# @logger.catch
@classmethod
def from_db_index(
def from_db_key(
cls: Type[_FSEntryT],
cursor: sqlite3.Cursor,
root_id: Optional[int] = None,
store: "Store",
root_key: Optional[str] = None,
root_row: Optional[
Tuple[int, str, bool]
Tuple[str, str, str] # sha256, name, store
] = None, # TODO: Type the expected sqlite rows
parent: Optional[_FSEntryT] = None,
) -> _FSEntryT:
"""Given id of an entry in filedir, recursively fill this object"""
"""Given key of an entry in filedir, recursively fill this object"""
if root_row is None:
assert root_id is not None
assert root_key is not None
cursor.execute(
"SELECT id, name, frozen FROM filedir WHERE id=?",
(root_id,),
"SELECT sha256, name, store FROM filedir WHERE sha256=?",
(root_key,),
)
root_row = cursor.fetchone()
root_id, filename, frozen = root_row
root_key, filename, store_key = root_row
assert store_key == store.uuid
relpath = filename if parent is None else os.path.join(parent.relpath, filename)
# instantiate class before filling children
# instantiate class before filling children and versions
ob = cls(
id=root_id,
filename=filename,
relpath=relpath,
parent=parent,
children=[],
filetype=None,
unfrozen_perms=None,
sha256=None,
deleted=None,
versions=[],
store=store,
)
cursor.execute(
"SELECT id, name, frozen FROM filedir WHERE parent=?",
(root_id,),
"SELECT sha256, name, store FROM filedir WHERE parent=?",
(root_key,),
)
rows = cursor.fetchall()
ob.children = [cls.from_db_index(cursor, root_row=r, parent=ob) for r in rows]
ob.children = [
cls.from_db_key(cursor, root_row=r, parent=ob, store=store) for r in rows
]
# get all versions
cursor.execute(
"SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time",
(root_id,),
(root_key,),
)
matches = cursor.fetchall()
versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
if len(versions) > 0:
last_ver = versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
ob.latest_version = last_ver
ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
return ob
@ -409,19 +384,13 @@ class FSEntry:
childsec = childsep + childsep.join(c for c in childstrs)
# TODO: list versions in str()
# versions: [FSEntryVersion] = []
return "\n".join(
(" " * level) + line
for line in f"""id: {self.id}
for line in f"""sha256: {self.sha256.hex()}
filename: {self.filename}
relpath: {self.relpath}
parent (relpath): {'None' if self.parent is None else self.parent.relpath}
filetype: {self.filetype}
deleted: {self.deleted}
unfrozen_perms: {self.unfrozen_perms}
symlink_target: {self.symlink_target}
sha256: {'None' if self.sha256 is None else self.sha256.hex()}
num versions: {len(self.versions)}
children: {childsec}
""".splitlines()
)
@ -448,12 +417,18 @@ class FSDiff:
@staticmethod
def compare(A: FSEntry, B: FSEntry) -> bool:
return (
A.sha256 == B.sha256
and A.unfrozen_perms == B.unfrozen_perms
and A.filetype == B.filetype
and A.deleted == B.deleted
)
# get latest versions
Alatest = None if len(A.versions) == 0 else A.versions[-1]
Blatest = None if len(B.versions) == 0 else B.versions[-1]
if Alatest is None or Blatest is None:
return False
else:
return (
Alatest.sha256 == Blatest.sha256
and Alatest.perms == Blatest.perms
and Alatest.filetype == Blatest.filetype
and Alatest.deleted == Blatest.deleted
)
def filename(self) -> str:
if self.A is not None:
@ -462,14 +437,6 @@ class FSDiff:
assert self.B is not None
return self.B.filename
def filetype(self) -> Optional[FileType]:
if self.A is not None:
return self.A.filetype
elif self.B is not None:
return self.B.filetype
else:
return None
@classmethod
def compute(
cls: Type[_FSDiffT], A: Optional[FSEntry], B: Optional[FSEntry]
@ -528,6 +495,24 @@ class FSDiff:
return cls(A, B, modified_children)
def persist(
self,
cur: sqlite3.Cursor,
source_task: program.Task,
parent: Optional[_FSDiffT] = None,
) -> None:
"""Record this level of a diff."""
if self.A is None: # new file
assert self.B is not None
self.B.persist(cur, source_task)
elif self.B is None: # deleted file
self.A.persist_delete(cur, source_task)
else:
# either this node modified, or children are
for c in self.modified_children:
c.persist(cur=cur, source_task=source_task, parent=self)
def flatten_tree(self, level: int = 0) -> List[Tuple[int, "FSDiff"]]:
"""Return list of all entries, with level, in pairs"""
pairs = [(level, self)]

21
src/nancy/io.py Normal file
View File

@ -0,0 +1,21 @@
from .data import Box
from dataclasses import dataclass, InitVar
from pathlib import Path
@dataclass
class FileDir(Box[Path]):
mode: InitVar[str] = "rw"
def __post_init__(self, mode: str = "rw") -> None:
self.reads = "r" in mode
self.writes = "w" in mode
class Dir(FileDir):
pass
class File(FileDir):
pass

View File

@ -1,6 +1,5 @@
from dataclasses import dataclass
import hashlib
import json
import platform
import sqlite3
import time

206
src/nancy/program.py Normal file
View File

@ -0,0 +1,206 @@
"""Programs and tasks."""
from loguru import logger
from . import environment
from dataclasses import dataclass
import datetime
import hashlib
import os
import sqlite3
from typing import Any, Callable, List, Optional, Type, TYPE_CHECKING
import uuid
if TYPE_CHECKING: # avoid cyclic imports but enable proper type checking
from .store import Store
@dataclass(slots=True, frozen=True)
class Package:
name: str
version: str
language: str = "Python"
sha256: bytes = b""
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("Package:")
upstr(self.name)
upstr(self.version)
upstr(self.language)
object.__setattr__(self, "sha256", m.digest())
@dataclass(slots=True, frozen=True)
class Module:
name: str
code: Optional[str]
package: Optional[Package]
sha256: bytes = b""
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("Module:")
upstr(self.name)
if self.code is not None:
upstr(self.code)
if self.package is not None:
m.update(self.package.sha256)
object.__setattr__(self, "sha256", m.digest())
@dataclass(slots=True, frozen=True)
class Function:
name: str
module: Module
func: Callable[[Any], Any]
inputs: List["FunctionInput"] # not included in hash
outputs: List["FunctionOutput"]
sha256: bytes = b""
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("Function:")
upstr(self.name)
m.update(self.module.sha256)
object.__setattr__(self, "sha256", m.digest())
@dataclass(slots=True, frozen=True)
class FunctionInput:
name: str
description: str
required: bool
function: Function
position: int
argtype: Type[Any]
const: bool = False
sha256: bytes = b""
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("FunctionInput")
upstr(self.name)
upstr(self.description)
m.update(self.function.sha256)
object.__setattr__(self, "sha256", m.digest())
@dataclass(slots=True, frozen=True)
class FunctionOutput:
name: str
function: Function
position: int
sha256: bytes = b""
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upstr("FunctionOutput")
upstr(self.name)
object.__setattr__(self, "sha256", m.digest())
@dataclass(slots=True)
class Task:
program: "Program"
uuid: str = ""
py_function: Optional[Function] = None
def __post_init__(self) -> None:
self.uuid = str(uuid.uuid4())
def insert(self, cur: sqlite3.Cursor) -> None:
func_id = None if self.py_function is None else self.py_function.sha256.hex()
cur.execute(
"INSERT INTO task VALUES (?, ?, ?)",
(self.uuid, self.program.uuid, func_id),
)
@dataclass(slots=True)
class Program:
store: "Store"
name: str
message: str
start_time: Optional[datetime.datetime] = None
evaluated: bool = False
uuid: str = ""
def __post_init__(self) -> None:
self.uuid = str(uuid.uuid4())
def __enter__(self) -> "Program":
if self.evaluated:
raise RuntimeError("Cannot re-enter a Program context")
env = environment.Environment.detect()
with self.store.committing() as cur:
env.maybe_insert(cur)
cur.execute(
"INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)",
(
self.uuid, # uuid
self.name, # name TEXT,
# name of the program, usually written lowercase by calling
# code e.g. cnn_crossval
# -- we use POSIX timestamps for time recording.
# -- e.g. datetime.datetime.now().timestamp()
None, # start_time REAL,
None, # end_time REAL,
os.getpid(), # process_id INTEGER, -- host PID of python process on host OS
env.sha256.hex(), # environment INTEGER NOT NULL,
self.message, # user-defined message to help distinguish similar runs
),
)
self.start_time = datetime.datetime.now()
return self
def new_task(
self,
name: str,
cur: sqlite3.Cursor,
py_function: Optional[Function] = None,
) -> Task:
"""Create a new task and return its uuid"""
t = Task(program=self, py_function=py_function)
t.insert(cur=cur)
return t
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[BaseException],
traceback: Optional[Any],
) -> None:
end_time = datetime.datetime.now()
# record start and end times in store
assert self.store.conn is not None
with self.store.committing() as cur:
cur.execute(
"""
UPDATE
program
SET
start_time = ?,
end_time = ?
WHERE
uuid = ?
""",
(self.start_time, end_time, self.uuid),
)
self.evaluated = True # prevent re-running
assert self.start_time is not None
elapsed = end_time - self.start_time
logger.success(
f"Program [{self.uuid}] {self.name} "
f"(message:{self.message}) ran in {elapsed} seconds."
)

View File

@ -87,7 +87,7 @@ CREATE TABLE store (
-- imported and live outside the current store.
-- We do not support renaming files.
CREATE TABLE filedir (
uuid TEXT PRIMARY KEY NOT NULL,
sha256 TEXT PRIMARY KEY NOT NULL,
store TEXT NOT NULL,
name TEXT, -- only a filename, not a path
parent TEXT REFERENCES filedir ON UPDATE CASCADE,
@ -96,7 +96,7 @@ CREATE TABLE filedir (
-- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE uuid = NEW.parent)
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE sha256 = NEW.parent)
THEN RAISE (ABORT, 'Parent resides in different store')
END; END;
CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir
@ -123,8 +123,7 @@ CREATE TABLE filedir_version (
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
deleted BOOL NOT NULL, -- set True when recording a deleted file
-- We record the permissions on each file, in a way that enables reloading
-- permissions properly when thawing after a freeze operation.
-- We record the permissions on each file to enable fixing if needed
perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink.
@ -159,7 +158,7 @@ END; END;
CREATE TABLE environment (
sha256 TEXT PRIMARY KEY NOT NULL,
envvars_json TEXT, -- json.dumps(dict(os.environ))
envvars_json TEXT, -- json.dumps(dict(os.environ), sort_keys=True)
python_implementation TEXT, -- platform.python_implementation(): 'cpython'
python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]'
python_hexversion INTEGER, -- sys.hexversion: 50923504

View File

@ -2,7 +2,7 @@
from loguru import logger
from . import db, environment, fs
from . import db, environment, fs, program
from contextlib import contextmanager
from dataclasses import dataclass
@ -14,96 +14,6 @@ from typing import Any, Iterator, Optional, TypeVar, Type, Union
import uuid
import warnings
@dataclass(slots=True)
class Program:
store: "Store"
name: str
message: str
id: Optional[int] = None
start_time: Optional[datetime.datetime] = None
evaluated: bool = False
uuid: str = ""
def __post_init__(self) -> None:
self.uuid = str(uuid.uuid4())
def __enter__(self) -> "Program":
if self.evaluated:
raise RuntimeError("Cannot re-enter a Program context")
env = environment.Environment.detect()
with self.store.committing() as cur:
env.maybe_insert(cur)
cur.execute(
"INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)",
(
self.uuid, # uuid
self.name, # name TEXT,
# name of the program, usually written lowercase by calling
# code e.g. cnn_crossval
# -- we use POSIX timestamps for time recording.
# -- e.g. datetime.datetime.now().timestamp()
None, # start_time REAL,
None, # end_time REAL,
os.getpid(), # process_id INTEGER, -- host PID of python process on host OS
env.sha256.hex(), # environment INTEGER NOT NULL,
self.message, # user-defined message to help distinguish similar runs
),
)
self.id = cur.lastrowid
self.start_time = datetime.datetime.now()
return self
def new_task(
self,
name: str,
cur: sqlite3.Cursor,
py_function_id: Optional[int] = None,
) -> int:
"""Create a new task and return its id"""
cur.execute(
"INSERT INTO task VALUES (?, ?, ?)",
(None, self.id, py_function_id),
)
taskid = cur.lastrowid
assert isinstance(taskid, int)
return taskid
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[BaseException],
traceback: Optional[Any],
) -> None:
end_time = datetime.datetime.now()
# record start and end times in store
assert self.store.conn is not None
with self.store.committing() as cur:
cur.execute(
"""
UPDATE
program
SET
start_time = ?,
end_time = ?
WHERE
uuid = ?
""",
(self.start_time, end_time, self.uuid),
)
self.evaluated = True # prevent re-running
assert self.start_time is not None
elapsed = end_time - self.start_time
logger.success(
f"Program [{self.id}] {self.name} "
f"(message:{self.message}) ran in {elapsed} seconds."
)
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
_StoreT = TypeVar("_StoreT", bound="Store")
@ -111,34 +21,25 @@ _StoreT = TypeVar("_StoreT", bound="Store")
class Store:
"""Describes a data directory, holds active connection to nancy.db"""
path: Optional[fs.PathStr]
path: fs.PathStr
db_path: fs.PathStr
conn: Optional[sqlite3.Connection]
uuid: str
def __init__(
self,
directory: Optional[fs.PathStr] = None,
directory: fs.PathStr,
conn: Optional[sqlite3.Connection] = None,
):
"""
Arguments:
directory (str): Location of existing store directory. If omitted
or None, initialize a store in memory, with no associated
directory.
"""
if directory is None:
self.path = None
self.db_path = ":memory:"
else:
self.path = Path(directory)
self.db_path = self.path / "nancy.db"
self.path = Path(directory)
self.db_path = self.path / "nancy.db"
if conn is None:
self.connect()
else:
self.conn = conn
self.store_uuid = self.find_store_uuid()
self.uuid = self.find_store_uuid()
def copy(self: _StoreT, store_path: fs.PathStr) -> _StoreT:
"""Copy this store to a new store path"""
@ -162,7 +63,7 @@ class Store:
yield cur
self.conn.commit()
def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]:
def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> str:
assert self.conn is not None
if cur is None:
cur = self.conn.cursor()
@ -170,25 +71,22 @@ class Store:
'SELECT value FROM local_metadata WHERE key == "store_uuid" LIMIT 1'
)
res = cur.fetchone()
return None if res is None else res[0]
assert res is not None
(self.uuid,) = res
return self.uuid
@classmethod
def init(
cls: Type[_StoreT], message: str, directory: Optional[fs.PathStr] = None
) -> _StoreT:
def init(cls: Type[_StoreT], message: str, directory: fs.PathStr) -> _StoreT:
start_time = datetime.datetime.now()
if directory is None: # initialize an in-memory store
db_path = ":memory:"
else:
if not os.path.isdir(directory):
raise FileNotFoundError(
f"Directory {directory} must exist before initializing a store there.",
)
db_path = os.path.join(directory, "nancy.db")
if os.path.isfile(db_path):
raise FileExistsError(
f"File {db_path} exists. Refusing to re-initialize",
)
if not os.path.isdir(directory):
raise FileNotFoundError(
f"Directory {directory} must exist before initializing a store there.",
)
db_path = os.path.join(directory, "nancy.db")
if os.path.isfile(db_path):
raise FileExistsError(
f"File {db_path} exists. Refusing to re-initialize",
)
# initialize a database in the target directory
conn = sqlite3.connect(db_path, isolation_level="DEFERRED")
cur = conn.cursor()
@ -196,36 +94,35 @@ class Store:
conn.commit()
cur.close()
new_store = cls(directory, conn)
new_store = cls(directory=directory, conn=conn)
with new_store.program("INIT", message) as p:
with new_store.new_program("INIT", message) as p:
# set the timing to the actual times it took to initialize the db
p.start_time = start_time
# generate a new UUID for this store
assert new_store.store_uuid is None
new_store.store_uuid = str(uuid.uuid4())
assert new_store.uuid is None
new_store.uuid = str(uuid.uuid4())
with new_store.committing() as cur:
cur.execute(
'INSERT INTO local_metadata VALUES ("store_uuid", ?)',
(new_store.store_uuid,),
(new_store.uuid,),
)
return new_store
def make_readonly(self) -> None:
"""Make store directory read-only (except for nancy.db) and return file list"""
fs.make_readonly_recursive(str(self.path), excluded=["./nancy.db"])
def filedir_root_index(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[int]:
"""Get the database id for the table entry in this store having name '.'"""
def filedir_root_key(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]:
"""Get the database key for the table entry in this store having name '.'."""
if cur is None:
assert self.conn is not None
cur = self.conn.cursor()
cur.execute("SELECT id FROM filedir WHERE store=1 AND parent is NULL")
(root_id,) = cur.fetchone()
assert isinstance(root_id, int)
return root_id
cur.execute("SELECT uuid FROM filedir WHERE store=1 AND parent is NULL")
row = cur.fetchone()
if row is None:
return None
(root_key,) = row
assert isinstance(root_key, str)
return root_key
def path_to_fsentry(self, path: fs.PathStr) -> Optional[fs.FSEntry]:
"""Find a path in the filedir database and return it as an fsentry.
@ -242,156 +139,72 @@ class Store:
)
# rel tells us how to descend recurively to find the filedir for path
fd_id = self.filedir_root_index(cur)
if fd_id is None:
fd_key = self.filedir_root_key(cur)
if fd_key is None:
# Root isn't even inserted into the db yet
return None
for p in Path(rel).parts: # Path.parts splits a path reliably
# get child with that name
cur.execute(
"SELECT id, filetype FROM filedir WHERE filename=? AND parent=? LIMIT 1",
(p, fd_id),
"SELECT uuid, filetype FROM filedir WHERE filename=? AND parent=? LIMIT 1",
(p, fd_key),
)
row = cur.fetchone()
if row is None:
return None
fd_id, filetype = row
fd_key, filetype = row
return fs.FSEntry.from_db_index(cur, root_id=fd_id)
return fs.FSEntry.from_db_key(cur, root_key=fd_key, store=self)
def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]:
"""Return recursive structure containing FSEntry objects from db"""
root_id = self.filedir_root_index()
if root_id is None:
root_key = self.filedir_root_key()
if root_key is None:
return None
else:
assert self.conn is not None
return fs.FSEntry.from_db_index(self.conn.cursor(), root_id=root_id)
return fs.FSEntry.from_db_key(
self.conn.cursor(), root_key=root_key, store=self
)
def program(self, name: str, message: str) -> Program:
p = Program(self, name, message)
def new_program(self, name: str, message: str) -> program.Program:
p = program.Program(self, name, message)
return p
def diff(self) -> fs.FSDiff:
"""
Find changes to files and dirs compared to their recorded versions
Find changes to files and dirs compared to their recorded versions.
"""
# get info about files currently at the given locations
current = fs.FSEntry.from_path(str(self.path))
current = fs.FSEntry.from_path(store=self)
# then find a listing covering all the expected paths
recorded = self.fs_entries(shallow=True)
return fs.FSDiff.compute(recorded, current)
def _record_file_version(
self,
cur: sqlite3.Cursor,
ob: fs.FSEntry,
filedir_id: int,
source_task: Optional[int] = None,
) -> int:
cur.execute(
"INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
None,
filedir_id,
datetime.datetime.now().timestamp(),
ob.filetype,
False,
ob.unfrozen_perms,
ob.symlink_target,
None if ob.sha256 is None else ob.sha256.hex(),
source_task,
),
)
assert isinstance(cur.lastrowid, int)
return cur.lastrowid
def _record_new_file_recursive(
self,
ob: fs.FSEntry,
cur: sqlite3.Cursor,
parent_id: Optional[int],
source_task: Optional[int],
) -> None:
# Find entries with this name and parent
cur.execute(
"SELECT id FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1",
(ob.filename, None if ob.parent is None else ob.parent.id),
)
res = cur.fetchall()
if len(res) == 0:
# create filedir entry and get its id
cur.execute(
"INSERT INTO filedir VALUES (?, ?, ?, ?, ?)",
(
None,
1,
ob.filename,
parent_id,
False,
),
)
thisid = cur.lastrowid
else:
(thisid,) = res[0]
assert isinstance(thisid, int)
self._record_file_version(cur, ob, thisid, source_task=source_task)
# descend into children and record all of them anew as well
for c in ob.children:
self._record_new_file_recursive(c, cur, thisid, source_task)
def _record_recursive(
self,
diff: fs.FSDiff,
cur: sqlite3.Cursor,
parent_id: Optional[int] = None,
source_task: Optional[int] = None,
) -> None:
"""Record this level of a diff."""
if diff.A is None:
assert diff.B is not None
self._record_new_file_recursive(
diff.B, cur, parent_id, source_task=source_task
)
elif diff.B is None:
# self._record_deleted_file_recursive(diff.B, cur, parent_id)
pass
else:
# possibly modified, record new version then recurse into children
self._record_new_file_recursive(
diff.B, cur, parent_id, source_task=source_task
)
assert diff.A.id is not None
self._record_file_version(cur, diff.B, diff.A.id, source_task=source_task)
# descend into children
def record(
self,
diff: fs.FSDiff,
message: str,
parent_id: Optional[int] = None,
parent_id: Optional[str] = None,
cur: Optional[sqlite3.Cursor] = None,
) -> None:
if cur is None:
assert self.conn is not None
cur = self.conn.cursor()
with self.program("RECORD", message) as p:
with self.new_program("RECORD", message) as p:
with self.committing() as cur: # entire record operation is one transaction
# create a task for this operation
task_id = p.new_task("Store._record_recursive", cur=cur)
task = p.new_task(name="Store._record_recursive", cur=cur)
# descend the diff, tracking parent filedir IDs, creating them and
# recording new versions of each, when necessary
self._record_recursive(diff, cur, source_task=task_id)
diff.persist(cur=cur, source_task=task)
def find_store(path: Union[str, "os.PathLike[str]"]) -> Optional[str]:

0
tests/__init__.py Normal file
View File

38
tests/cli/test_record.py Normal file
View File

@ -0,0 +1,38 @@
from click.testing import CliRunner
from nancy.cli import main
import pytest
import os
from pathlib import Path
import tempfile
from typing import Iterator
@pytest.fixture
def junk_dir() -> Iterator[Path]:
"""Create a temp directory with a few files"""
with tempfile.TemporaryDirectory(prefix="nancy_junkdir") as d:
root = Path(d)
open(root / "empty.txt", "w").close() # touch a file
open(root / "full.txt", "w").write("something") # touch a file
os.mkdir(root / "d")
open(root / "foo.txt", "w").write("bar") # touch a file
yield root
def test_record(junk_dir: Path) -> None:
runner = CliRunner()
result = runner.invoke(
main,
[
"record",
"-s",
str(junk_dir),
"-m",
"This is just a test recording",
],
input="y\n",
)
print(result.output)
assert result.exit_code == 0
assert "ERROR" not in result.output

View File

@ -11,7 +11,7 @@ from typing import Iterator
@pytest.fixture
def bare_dir() -> Iterator[Path]:
"""Create an emptry temp directory"""
"""Create an empty temp directory"""
with tempfile.TemporaryDirectory(prefix="nancy_testdir") as d:
yield Path(d)
@ -33,8 +33,8 @@ def test_record_untracked_dir(filled_dir: Path) -> None:
@pytest.fixture
def empty_store() -> Iterator[store.Store]:
s = store.Store.init(message="test init")
def empty_store(bare_dir: Path) -> Iterator[store.Store]:
s = store.Store.init(directory=bare_dir, message="test init")
yield s