Change to using UUID and SHA256 as DB keys

This is important for distributed settings, and will make merging
databases much simpler. This change is pretty extensive and includes a
lot of other stuff like moving some fields between tables, and
introducing the `local_metadata` table which is a key-value store and
holds the uuid of the "local" store, i.e. the one corresponding to the
directory holding that particular nancy.db.
This commit is contained in:
Jacob Hinkle 2022-10-10 15:14:47 -04:00
parent c4648ec042
commit 07ccef601c
13 changed files with 425 additions and 468 deletions

View File

@ -5,7 +5,7 @@ description = "Composable tracking of scientific data provenance"
authors = ["Jacob Hinkle <jacob.hinkle@jhink.org>"]
[tool.poetry.dependencies]
python = "^3.8"
python = "^3.10"
click = "^8.1.3"
colorama = "^0.4.5"
loguru = "^0.6.0"
@ -28,7 +28,7 @@ build-backend = "poetry.core.masonry.api"
[tool.tox]
legacy_tox_ini = """
[tox]
envlist = py38,py39,py310,mypy
envlist = mypy,py310
isolated_build = true
[testenv]
@ -40,7 +40,10 @@ commands =
pytest --cov src/nancy
[testenv:mypy]
deps = mypy
deps =
mypy
pytest
commands =
mypy --strict -p nancy
mypy --strict --ignore-missing-imports --no-incremental -p nancy
mypy --strict --no-incremental tests/
"""

View File

@ -48,13 +48,16 @@ def print_diff(
dirstr = (
(filetypecolors["DIR"] + dname + "/" + reset) if dname != "" else ""
)
fname = filetypecolors.get(entry.filetype, "") + fname + reset
assert entry.filetype is not None
fname = filetypecolors.get(str(entry.filetype), "") + fname + reset
if entry.filetype == "LNK": # append symlink target
if entry.filetype == fs.FileType.LNK: # append symlink target
assert entry.symlink_target is not None
fname += " -> " + entry.symlink_target
relpath = dirstr + fname
assert entry.sha256 is not None
hashchange = (
(hashcolor + entry.sha256.hex() + reset + " " + changetags[tag])
if show_hashes

View File

@ -3,11 +3,12 @@ import click
from .. import store
import os
from typing import List
@click.command()
@click.argument("directory")
def freeze(directory):
def freeze(directory: str) -> None:
"""
Initialize tracking in a directory or freeze a tracked directory.
@ -25,12 +26,12 @@ def freeze(directory):
else: # this is an existing store
s = store.Store(directory)
s.freeze()
# s.freeze()
@click.command()
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
def thaw(files):
def thaw(files: List[str]) -> None:
"""
Enable manual alteration of files within a tracked directory.

View File

@ -32,7 +32,7 @@ def record(
if not os.path.exists(os.path.join(store_path, "nancy.db")):
# this is a new store
logger.info(f"Initializing new store in {store_path}...")
s = store.Store.init(store_path)
s = store.Store.init(message=message, directory=store_path)
else: # this is an existing store
s = store.Store(store_path)
@ -78,7 +78,7 @@ def record(
"of given paths. If given the path to a non-store directory, a new "
"store is initialized there.",
)
def record_cli(show_hashes, no_color, message, store):
def record_cli(show_hashes: bool, no_color: bool, message: str, store: str) -> None:
"""
Initialize tracking or record changes to a tracked directory.
"""

View File

@ -1,83 +1,79 @@
from . import user
from dataclasses import dataclass
import hashlib
import json
import os
import platform
import sqlite3
import sys
from typing import NamedTuple, Optional, TypeVar, Type
import time
from typing import Callable, Optional, TypeVar, Type
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
_EnvironmentT = TypeVar("_EnvironmentT", bound="Environment")
class Environment(NamedTuple):
id: Optional[int]
@dataclass(frozen=True, slots=True)
class Environment:
envvars_json: str
python_implementation: str
python_strversion: str
python_hexversion: int
user: user.User
timezone: str
release: str
freedesktop_os_release: str
win32_ver: str
mac_ver: str
sha256: bytes = b""
@classmethod
def find_or_insert(
cls: Type[_EnvironmentT],
cur: sqlite3.Cursor,
env: Optional[_EnvironmentT] = None,
) -> _EnvironmentT:
"""Given a DB cursor, find or create row in environment table and fill"""
if env is None:
env = cls.detect()
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upint: Callable[[int], None] = lambda n: m.update(
n.to_bytes((n.bit_length() + 7) // 8, byteorder="big"),
)
upstr(self.envvars_json)
upstr(self.python_implementation)
upstr(self.python_strversion)
upint(self.python_hexversion)
m.update(self.user.sha256)
upstr(self.timezone)
upstr(self.release)
upstr(self.freedesktop_os_release)
upstr(self.win32_ver)
upstr(self.mac_ver)
# circumvent freezing to set sha256
object.__setattr__(self, "sha256", m.digest())
u = user.User.find_or_insert(cur)
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
"""Insert if not exists, without committing."""
self.user.maybe_insert(cur)
env = env._replace(user=u)
key = self.sha256.hex()
# insert or ignore, handle each case to set id
cur.execute(
"""
SELECT
id
FROM
environment
WHERE
envvars_json = ? AND
python_implementation = ? AND
python_strversion = ? AND
python_hexversion = ? AND
user = ?
LIMIT 1
INSERT OR IGNORE INTO environment VALUES (?,?,?,?,?,?,?,?,?,?,?);
""",
(
env.envvars_json,
env.python_implementation,
env.python_strversion,
env.python_hexversion,
env.user.id,
key,
self.envvars_json,
self.python_implementation,
self.python_strversion,
self.python_hexversion,
self.user.sha256.hex(),
self.timezone,
self.release,
self.freedesktop_os_release,
self.win32_ver,
self.mac_ver,
),
)
res = cur.fetchone()
if res is None:
cur.execute(
"""
INSERT INTO environment VALUES (?,?,?,?,?,?);
""",
(
env.id,
env.envvars_json,
env.python_implementation,
env.python_strversion,
env.python_hexversion,
env.user.id,
),
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return env._replace(id=id)
return key
@classmethod
def detect(cls: Type[_EnvironmentT]) -> _EnvironmentT:
@ -87,11 +83,22 @@ class Environment(NamedTuple):
"""
u = user.User.detect()
fdor = ""
try:
fdor = json.dumps(platform.freedesktop_os_release())
except AttributeError:
# freedesktop_os_release only available for python >= 3.10
fdor = ""
return cls(
None,
json.dumps(dict(os.environ)),
platform.python_implementation(),
sys.version,
sys.hexversion,
u,
timezone=time.tzname[time.daylight],
release=platform.release(),
freedesktop_os_release=fdor,
win32_ver=json.dumps(platform.win32_ver()),
mac_ver=json.dumps(platform.mac_ver()),
)

View File

@ -203,7 +203,7 @@ class FSEntry:
)
s = filestat.st_mode
children = []
children: List[FSEntry] = []
symlink_target: Optional[Union[str, bytes]] = None
if os.path.islink(path):
# Check links first, since it is not exclusive with dir or file checks
@ -518,7 +518,7 @@ class FSDiff:
allnames = set(list(Alist.keys()) + list(Blist.keys()))
modified_children = [
modified_children: List[FSDiff] = [
cls.compute(
Alist.get(n, None),
Blist.get(n, None),

View File

@ -1,104 +1,83 @@
from typing import NamedTuple, Optional, Type, TypeVar
from dataclasses import dataclass
import hashlib
import json
import platform
import sqlite3
import time
from typing import Callable, NamedTuple, Optional, Type, TypeVar
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
_MachineT = TypeVar("_MachineT", bound="Machine")
class Machine(NamedTuple):
id: Optional[int]
@dataclass(frozen=True, slots=True)
class Machine:
machine_id: Optional[str]
hostname: str
processor: str
system: str
release: str
cpu_type: str
timezone: str
freedesktop_os_release: str
win32_ver: str
mac_ver: str
processor: str
sha256: bytes = b""
@classmethod
def find_or_insert(
cls: Type[_MachineT], cur: sqlite3.Cursor, machine: Optional[_MachineT] = None
) -> _MachineT:
"""Given a DB cursor, find or create row in machine table and fill"""
if machine is None:
machine = cls.detect()
def __post_init__(self) -> None:
"""Derive sha256 from other attributes."""
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
if self.machine_id is not None:
upstr(self.machine_id)
upstr(self.hostname)
upstr(self.processor)
upstr(self.system)
upstr(self.cpu_type)
# circumvent freezing to set sha256
object.__setattr__(self, "sha256", m.digest())
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
"""Insert if not exists, without committing."""
key = self.sha256.hex()
# insert or ignore, handle each case to set id
cur.execute(
"""
SELECT
id
FROM
machine
WHERE
machine_id = ? AND
hostname = ? AND
processor = ? AND
system = ? AND
release = ? AND
cpu_type = ? AND
timezone = ? AND
freedesktop_os_release = ? AND
win32_ver = ? AND
mac_ver = ?
LIMIT 1
INSERT OR IGNORE INTO machine VALUES (?,?,?,?,?,?);
""",
machine[1:],
(
key,
self.machine_id,
self.hostname,
self.system,
self.cpu_type,
self.processor,
),
)
res = cur.fetchone()
if res is None:
cur.execute(
"""
INSERT INTO machine VALUES (?,?,?,?,?,?,?,?,?,?,?);
""",
machine,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return machine._replace(id=id)
return key
@classmethod
def detect(cls: Type[_MachineT]) -> _MachineT:
"""Formats machine-specific information into a MachineInfo object.
"""Format machine-specific information into a MachineInfo object.
Note that 'MachineInfo' objects are properly formatted to be inserted into
the `machine` table.
Raises:
NotImplementedError: if system != "Linux"
"""
fdor = ""
try:
fdor = json.dumps(platform.freedesktop_os_release())
except AttributeError:
# freedesktop_os_release only available for python >= 3.10
fdor = ""
system = platform.system()
mid = None
if system == "Linux":
if system.lower() == "linux":
try:
mid = open("/etc/machine-id", "r").read()
except FileNotFoundError:
pass
else:
raise NotImplementedError()
return cls(
None,
machine_id=mid,
hostname=platform.node(),
processor=platform.processor(),
system=system,
release=platform.release(),
cpu_type=platform.machine(),
timezone=time.tzname[time.daylight],
freedesktop_os_release=fdor,
win32_ver=json.dumps(platform.win32_ver()),
mac_ver=json.dumps(platform.mac_ver()),
processor=platform.processor(),
)

View File

@ -8,6 +8,15 @@ PRAGMA foreign_keys=ON;
-- facilitate easy importing of one database into another
-- Simple key/value table describing _this_ store (not imported ones).
-- In particular, the key "local_store" should be the UUID of the store in this
-- directory.
CREATE TABLE local_metadata(
key TEXT PRIMARY KEY NOT NULL,
value TEXT
);
-- This lets us enable or disable triggers. For example, when importing we need
-- to update some tables to account for changing primary keys using ON UPDATE
-- CASCADE. However, some dependent tables like filedir may have a trigger that
@ -18,58 +27,38 @@ PRAGMA foreign_keys=ON;
-- Note that entries in this table only need to be inserted when we know we have
-- a need to disable the trigger. Otherwise, this table can be ignored.
CREATE TABLE triggers(
name TEXT PRIMARY KEY,
name TEXT PRIMARY KEY NOT NULL,
enabled BOOL
);
-- We track host machines, so that we can give better information about file
-- locations. This also allows us to see whether platform-dependent behavior may
-- come into play. Note that these are not reliable identifiers for purposes of
-- tracking down data, but may be helpful context.
CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
-- come into play.
-- A row of this table should describe the aspects of a node that are unlikely
-- to change during regular software updates. For example, the hostname and CPU
-- type are included but not the kernel version. Software that changes due to
-- updates should be included in the "environment" table instead.
CREATE TABLE machine(
sha256 TEXT PRIMARY KEY NOT NULL,
machine_id TEXT, -- platform-dependent unique hardware id
-- Linux: open('/etc/machine-id', 'r').read() (assumes systemd)
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID`
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID | awk '{$print $3}' | tr -d \"`
-- Windows: `reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography /v MachineGuid`
hostname TEXT, -- platform.node(): 'lucky'
processor TEXT, -- platform.processor():
system TEXT, -- platform.system(): 'Linux'
release TEXT, -- platform.release(): '5.15.64'
cpu_type TEXT, -- platform.machine(): 'x86_64'
timezone TEXT, -- timezone, for interpreting event times
freedesktop_os_release TEXT, -- requires python 3.10
-- platform.freedesktop_os_release() as JSON
-- "{'NAME': 'NixOS', 'ID': 'nixos',
-- 'PRETTY_NAME': 'NixOS 22.05 (Quokka)', 'BUG_REPORT_URL':
-- 'https://github.com/NixOS/nixpkgs/issues', 'BUILD_ID':
-- '22.05.20220902.67e4507', 'DOCUMENTATION_URL':
-- 'https://nixos.org/learn.html', 'HOME_URL': 'https://nixos.org/', 'LOGO':
-- 'nix-snowflake', 'SUPPORT_URL': 'https://nixos.org/community.html',
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
-- '22.05'}"
win32_ver TEXT, -- platform.win32_ver() as JSON
mac_ver TEXT, -- platform.mac_ver() as JSON
-- disallow duplicate rows
UNIQUE(hostname, processor, system, release, cpu_type, timezone,
freedesktop_os_release, win32_ver, mac_ver)
processor TEXT -- platform.processor():
);
-- Programs are run by users on machines
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE user(
sha256 TEXT PRIMARY KEY NOT NULL,
username TEXT NOT NULL, -- getpass.getuser()
userid INTEGER, -- os.getuid()
fullname TEXT, -- on Linux/OSX: pwd.getpwuid(os.getuid()).pw_gecos
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine INTEGER NOT NULL,
-- usernames and fullnames change rarely, but it happens. A more restrictive
-- constraint would not enable tracking of this info. The following only
-- guarantees no duplicate full rows.
UNIQUE(username, userid, fullname, machine),
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE
);
@ -88,41 +77,32 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
-- Client code should generate random UUIDs in the RFC 4122 variant layout.
-- https://datatracker.ietf.org/doc/html/rfc4122.html
-- This is possible in Python by simply calling uuid.uuid4() with no arguments
CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
machine INTEGER,
dbpath TEXT NOT NULL,
uuid TEXT NOT NULL, -- UUID generated by str(uuid.uuid4())
imported BOOL, -- is this the store for the current directory? If not, then it was
-- imported in order to describe a dependency.
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
CREATE TABLE store (
uuid TEXT PRIMARY KEY NOT NULL -- UUID generated by str(uuid.uuid4())
);
-- The filedir table holds all files and directories that are tracked by the
-- store. This table also holds tracked files and directories that have been
-- imported and live outside the current store.
-- We do not support renaming files. Once an entry is created here, it should
-- only be updated to reflect frozen/thawed status.
CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL,
store INTEGER NOT NULL,
-- We do not support renaming files.
CREATE TABLE filedir (
uuid TEXT PRIMARY KEY NOT NULL,
store TEXT NOT NULL,
name TEXT, -- only a filename, not a path
parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
frozen BOOL NOT NULL,
parent TEXT REFERENCES filedir ON UPDATE CASCADE,
UNIQUE(store, name, parent)
);
-- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE uuid = NEW.parent)
THEN RAISE (ABORT, 'Parent resides in different store')
END; END;
CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir
BEGIN SELECT CASE
WHEN (NEW.id != OLD.id OR NEW.store != OLD.store OR NEW.parent != OLD.parent)
THEN RAISE (ABORT, 'The only updates to filedir allowed are to the frozen column')
END; END;
BEGIN
SELECT RAISE (ABORT, 'No updates to filedir allowed');
END;
-- This table holds _versions_ of files and directories. This table holds
@ -131,8 +111,9 @@ END; END;
-- independent of filetype (though the computation of content hashes is of
-- course dependent on filetype). Each version has a number, and was provided by
-- some program (and potentially a datum).
CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
filedir INTEGER NOT NULL
CREATE TABLE filedir_version (
uuid TEXT PRIMARY KEY NOT NULL,
filedir TEXT NOT NULL
REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
recorded_time REAL, -- When was this version recorded?
@ -144,7 +125,7 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
-- We record the permissions on each file, in a way that enables reloading
-- permissions properly when thawing after a freeze operation.
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink.
@ -158,7 +139,7 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
-- can defer by only hashing files and directories at the end of a program.
sha256 TEXT,
source_task INTEGER REFERENCES task (id) ON UPDATE CASCADE
source_task TEXT REFERENCES task ON UPDATE CASCADE
);
-- Disallow UPDATING filedir_version. Instead, new version should be created.
-- One exception is during importing, in which case we can disable the trigger
@ -166,8 +147,7 @@ INSERT INTO triggers VALUES('update_filedir_version', TRUE);
CREATE TRIGGER update_filedir_version BEFORE UPDATE ON filedir_version
BEGIN SELECT CASE
WHEN (SELECT enabled FROM triggers WHERE name = 'update_filedir_version')
THEN RAISE (ABORT,
'Updating filedir versions is prohibited, other than for id changes')
THEN RAISE (ABORT, 'Updating filedir versions is prohibited')
END; END;
-- TODO: check for inserting inconsistent version info re. deleted flag/filetype
@ -176,17 +156,28 @@ END; END;
-- A computational environment which can execute "programs". Note that the
-- python executable being used, and environment variables are recorded here.
-- Other info is available in the parent "machine" table.
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE environment (
sha256 TEXT PRIMARY KEY NOT NULL,
envvars_json TEXT, -- json.dumps(dict(os.environ))
python_implementation TEXT, -- platform.python_implementation(): 'cpython'
python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]'
python_hexversion INTEGER, -- sys.hexversion: 50923504
user INTEGER,
-- disallow duplicate rows
UNIQUE(envvars_json, python_implementation, python_strversion, python_hexversion, user),
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
user TEXT REFERENCES user ON UPDATE CASCADE,
timezone TEXT, -- timezone, for interpreting event times
platform_release TEXT, -- platform.release(): '5.15.64'
freedesktop_os_release TEXT, -- requires python 3.10
-- platform.freedesktop_os_release() as JSON
-- "{'NAME': 'NixOS', 'ID': 'nixos',
-- 'PRETTY_NAME': 'NixOS 22.05 (Quokka)', 'BUG_REPORT_URL':
-- 'https://github.com/NixOS/nixpkgs/issues', 'BUILD_ID':
-- '22.05.20220902.67e4507', 'DOCUMENTATION_URL':
-- 'https://nixos.org/learn.html', 'HOME_URL': 'https://nixos.org/', 'LOGO':
-- 'nix-snowflake', 'SUPPORT_URL': 'https://nixos.org/community.html',
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
-- '22.05'}"
win32_ver TEXT, -- platform.win32_ver() as JSON
mac_ver TEXT -- platform.mac_ver() as JSON
);
@ -195,7 +186,8 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
-- python script, multiple "programs" may be created. Also note that historical
-- programs are automatically imported and merged when possible when loading a
-- "datum" from disk.
CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE program (
uuid TEXT PRIMARY KEY NOT NULL,
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
@ -205,65 +197,57 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
end_time REAL,
process_id INTEGER, -- host PID of python process on host OS
environment INTEGER NOT NULL,
message TEXT, -- user-defined message to help distinguish similar runs
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE,
message TEXT NOT NULL -- user-defined message to help distinguish similar runs
);
-- We try to track all python packages that impact execution by traversing a
-- copy of sys.modules. This is done once before a "program" and once after in
-- case some calling code winds up calling a previously-unloaded module.
CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE py_package (
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
version TEXT
version TEXT,
UNIQUE (name, version)
);
-- A py_module describes any python module file containing decorated Functions.
-- Modules are tracked since they impact the global scope of function calls.
CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE py_module(
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
code TEXT,
py_package INTEGER,
FOREIGN KEY (py_package) REFERENCES py_package (id) ON UPDATE CASCADE
code TEXT, -- code doesn't have to be included, but should be used to create sha256
py_package TEXT REFERENCES py_package ON UPDATE CASCADE
);
-- A py_function just describes a function, without reference to its arguments.
-- It can have inputs and outputs, which are described in the func_inputs and
-- func_outputs children tables.
CREATE TABLE py_function(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE py_function(
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
py_module INTEGER NOT NULL,
-- we use POSIX timestamps for time recording.
-- e.g. datetime.datetime.now().timestamp()
start_time REAL,
end_time REAL,
FOREIGN KEY (py_module) REFERENCES py_module (id) ON UPDATE CASCADE
py_module TEXT NOT NULL REFERENCES py_module ON UPDATE CASCADE
);
CREATE TABLE py_function_input(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE py_function_input(
uuid TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
typename TEXT NOT NULL,
py_function INTEGER NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
position INTEGER,
posonly BOOL,
kwonly BOOL,
description TEXT,
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE
description TEXT
);
CREATE TABLE py_function_output(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE py_function_output(
uuid TEXT PRIMARY KEY NOT NULL,
name TEXT,
typename TEXT NOT NULL,
py_function INTEGER NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
position INTEGER NOT NULL,
description TEXT,
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE
description TEXT
);
@ -271,47 +255,42 @@ CREATE TABLE py_function_output(id INTEGER PRIMARY KEY NOT NULL,
-- particular set of inputs which are themselves data (see datum table). A task
-- is executed in the context of a "program". Within a program, tasks are
-- typically evaluated in a serial manner.
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
program INTEGER NOT NULL,
CREATE TABLE task(
uuid TEXT PRIMARY KEY NOT NULL,
program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE,
py_function INTEGER, -- func is NULL for some built-in functionality like "record" programs
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE,
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
-- py_function is NULL for some built-in functionality like "RECORD" programs
py_function TEXT REFERENCES py_function ON UPDATE CASCADE
);
-- A datum is an object that is computed as the output of a task, given as a
-- literal value in a config file, or loaded from a file.
CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
-- provider_type describes where the datum came from. Choices are:
-- - COMPUTED: output of a decorated Function
-- - IMPORTED: imported output from a prior program
-- - LITERAL: literal value provided in a config file
-- Note that literals are implicitly imported (hash computed) at runtime
CREATE TABLE datum(
uuid TEXT PRIMARY KEY NOT NULL,
-- provider_type describes where the datum came from. Choices are:
-- - COMPUTED: output of a decorated Function
-- - IMPORTED: imported output from a prior program
-- - LITERAL: literal value provided in a config file
-- Note that literals are implicitly imported (hash computed) at runtime
provider_type TEXT,
-- task and task_output are only used when provider_type='COMPUTED'
task INTEGER,
task_output INTEGER,
task TEXT REFERENCES task ON UPDATE CASCADE,
task_output TEXT REFERENCES func_output ON UPDATE CASCADE,
literal_json TEXT, -- if provider_type='LITERAL', a JSON representation of the value
typename TEXT NOT NULL, -- string representation of the data type
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
FOREIGN KEY (task_output) REFERENCES func_output (id) ON UPDATE CASCADE
typename TEXT NOT NULL -- string representation of the data type
);
-- A task_input records the version of a Datum that is passed to a function
CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
task INTEGER NOT NULL,
py_function_input INTEGER, -- if this was a python function, reference which input
CREATE TABLE task_input(
uuid TEXT PRIMARY KEY NOT NULL,
task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE,
-- if this was a python function, reference which input
py_function_input TEXT REFERENCES py_function_input ON UPDATE CASCADE,
datum INTEGER NOT NULL,
datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE,
-- Data have versions to facilitate tracking non-const operations. If a datum
-- is passed to a non-const operation, it must increment its internal
-- version
datum_version INTEGER NOT NULL,
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
FOREIGN KEY (py_function_input) REFERENCES py_function_input (id) ON UPDATE CASCADE,
FOREIGN KEY (datum) REFERENCES datum (id) ON UPDATE CASCADE
datum_version INTEGER NOT NULL
);

View File

@ -4,16 +4,18 @@ from loguru import logger
from . import db, environment, fs
from contextlib import contextmanager
from dataclasses import dataclass
import datetime
import os
from pathlib import Path
import sqlite3
from typing import Any, Optional, TypeVar, Type, Union
from typing import Any, Iterator, Optional, TypeVar, Type, Union
import uuid
import warnings
@dataclass
@dataclass(slots=True)
class Program:
store: "Store"
name: str
@ -23,40 +25,45 @@ class Program:
start_time: Optional[datetime.datetime] = None
evaluated: bool = False
uuid: str = ""
def __post_init__(self) -> None:
self.uuid = str(uuid.uuid4())
def __enter__(self) -> "Program":
if self.evaluated:
raise RuntimeError("Cannot re-enter a Program context")
assert self.store.conn is not None
cur = self.store.conn.cursor()
env = environment.Environment.find_or_insert(cur)
cur.execute(
"INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)",
(
None, # id INTEGER PRIMARY KEY NOT NULL,
self.name, # name TEXT,
# name of the program, usually written lowercase by calling
# code e.g. cnn_crossval
# -- we use POSIX timestamps for time recording.
# -- e.g. datetime.datetime.now().timestamp()
None, # start_time REAL,
None, # end_time REAL,
os.getpid(), # process_id INTEGER, -- host PID of python process on host OS
env.id, # environment INTEGER NOT NULL,
self.message, # user-defined message to help distinguish similar runs
),
)
self.id = cur.lastrowid
env = environment.Environment.detect()
with self.store.committing() as cur:
env.maybe_insert(cur)
cur.execute(
"INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)",
(
self.uuid, # uuid
self.name, # name TEXT,
# name of the program, usually written lowercase by calling
# code e.g. cnn_crossval
# -- we use POSIX timestamps for time recording.
# -- e.g. datetime.datetime.now().timestamp()
None, # start_time REAL,
None, # end_time REAL,
os.getpid(), # process_id INTEGER, -- host PID of python process on host OS
env.sha256.hex(), # environment INTEGER NOT NULL,
self.message, # user-defined message to help distinguish similar runs
),
)
self.id = cur.lastrowid
self.start_time = datetime.datetime.now()
return self
def new_task(self, name: str, py_function_id: Optional[int] = None) -> int:
def new_task(
self,
name: str,
cur: sqlite3.Cursor,
py_function_id: Optional[int] = None,
) -> int:
"""Create a new task and return its id"""
assert self.store.conn is not None
cur = self.store.conn.cursor()
cur.execute(
"INSERT INTO task VALUES (?, ?, ?)",
(None, self.id, py_function_id),
@ -75,21 +82,20 @@ class Program:
# record start and end times in store
assert self.store.conn is not None
cur = self.store.conn.cursor()
cur.execute(
"""
UPDATE
program
SET
start_time = ?,
end_time = ?
WHERE
id = ?
""",
(self.start_time, end_time, self.id),
)
cur.connection.commit()
self._evaluated = True # prevent re-running
with self.store.committing() as cur:
cur.execute(
"""
UPDATE
program
SET
start_time = ?,
end_time = ?
WHERE
uuid = ?
""",
(self.start_time, end_time, self.uuid),
)
self.evaluated = True # prevent re-running
assert self.start_time is not None
elapsed = end_time - self.start_time
logger.success(
@ -126,11 +132,14 @@ class Store:
else:
self.path = Path(directory)
self.db_path = self.path / "nancy.db"
if conn is None:
self.connect()
else:
self.conn = conn
self.store_uuid = self.find_store_uuid()
def copy(self: _StoreT, store_path: fs.PathStr) -> _StoreT:
"""Copy this store to a new store path"""
assert self.conn is not None
@ -141,10 +150,28 @@ class Store:
return self.__class__(store_path)
def connect(self) -> sqlite3.Connection:
self.conn = sqlite3.connect(self.db_path)
self.conn = sqlite3.connect(self.db_path, isolation_level="DEFERRED")
self.conn.cursor().execute("PRAGMA foreign_keys = ON;")
return self.conn
@contextmanager
def committing(self) -> Iterator[sqlite3.Cursor]:
"""Return a context manager that gives a cursor and commits on exit."""
assert self.conn is not None
cur = self.conn.cursor()
yield cur
self.conn.commit()
def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]:
assert self.conn is not None
if cur is None:
cur = self.conn.cursor()
cur.execute(
'SELECT value FROM local_metadata WHERE key == "store_uuid" LIMIT 1'
)
res = cur.fetchone()
return None if res is None else res[0]
@classmethod
def init(
cls: Type[_StoreT], message: str, directory: Optional[fs.PathStr] = None
@ -163,14 +190,27 @@ class Store:
f"File {db_path} exists. Refusing to re-initialize",
)
# initialize a database in the target directory
conn = sqlite3.connect(db_path)
db.init_schema(conn.cursor())
conn = sqlite3.connect(db_path, isolation_level="DEFERRED")
cur = conn.cursor()
db.init_schema(cur)
conn.commit()
cur.close()
new_store = cls(directory, conn)
with new_store.program("INIT", message) as p:
# set the timing to the actual times it took to initialize the db
p.start_time = start_time
# generate a new UUID for this store
assert new_store.store_uuid is None
new_store.store_uuid = str(uuid.uuid4())
with new_store.committing() as cur:
cur.execute(
'INSERT INTO local_metadata VALUES ("store_uuid", ?)',
(new_store.store_uuid,),
)
return new_store
def make_readonly(self) -> None:
@ -345,24 +385,13 @@ class Store:
cur = self.conn.cursor()
with self.program("RECORD", message) as p:
# create a task for this operation
task_id = p.new_task("Store._record_recursive")
with self.committing() as cur: # entire record operation is one transaction
# create a task for this operation
task_id = p.new_task("Store._record_recursive", cur=cur)
# descend the diff, tracking parent filedir IDs, creating them and
# recording new versions of each, when necessary
self._record_recursive(diff, cur, source_task=task_id)
class StoreFile:
"""Describes a file that is recorded in the store."""
def __init__(self, store: Store, rel_path: fs.PathStr):
self.store = store
self.rel_path = rel_path
def save(self) -> None:
# call the appropriate save method
pass
# descend the diff, tracking parent filedir IDs, creating them and
# recording new versions of each, when necessary
self._record_recursive(diff, cur, source_task=task_id)
def find_store(path: Union[str, "os.PathLike[str]"]) -> Optional[str]:

View File

@ -1,76 +1,59 @@
from . import machine
from dataclasses import dataclass
import hashlib
import getpass
import os
import pwd
import sqlite3
from typing import NamedTuple, Optional, Type, TypeVar
from typing import Callable, NamedTuple, Optional, Type, TypeVar
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
_UserT = TypeVar("_UserT", bound="User")
class User(NamedTuple):
id: Optional[int] # if not None, this is `id` in the `machine` table
@dataclass(frozen=True, slots=True)
class User:
username: str
userid: int
fullname: str
machine: machine.Machine
sha256: bytes = b""
@classmethod
def find_or_insert(
cls: Type[_UserT], cur: sqlite3.Cursor, user: Optional[_UserT] = None
) -> _UserT:
"""Given a DB cursor, find or create row in user table and fill"""
if user is None:
user = cls.detect()
def __post_init__(self) -> None:
m = hashlib.sha256()
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
upint: Callable[[int], None] = lambda n: m.update(
n.to_bytes((n.bit_length() + 7) // 8, byteorder="big"),
)
upstr(self.username)
upint(self.userid)
upstr(self.fullname)
m.update(self.machine.sha256)
# circumvent freezing to set sha256
object.__setattr__(self, "sha256", m.digest())
m = machine.Machine.find_or_insert(cur)
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
"""Insert if not exists, without committing."""
self.machine.maybe_insert(cur)
user = user._replace(machine=m)
key = self.sha256.hex()
# insert or ignore, handle each case to set id
cur.execute(
"""
SELECT
id
FROM
user
WHERE
username = ? AND
userid = ? AND
fullname = ? AND
machine = ?
LIMIT 1
INSERT OR IGNORE INTO user VALUES (?,?,?,?,?);
""",
(
user.username,
user.userid,
user.fullname,
user.machine.id,
key,
self.username,
self.userid,
self.fullname,
self.machine.sha256.hex(),
),
)
res = cur.fetchone()
if res is None:
cur.execute(
"""
INSERT INTO user VALUES (?,?,?,?,?);
""",
(
user.id,
user.username,
user.userid,
user.fullname,
user.machine.id,
),
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return user._replace(id=id)
return key
@classmethod
def detect(cls: Type[_UserT]) -> _UserT:
@ -81,12 +64,9 @@ class User(NamedTuple):
# TODO: will this fail on Windows/OSX?
fullname = pwd.getpwuid(os.getuid()).pw_gecos
m = machine.Machine.detect()
return cls(
None,
getpass.getuser(),
os.getuid(),
fullname,
m,
machine=machine.Machine.detect(),
)

View File

@ -1,3 +1,3 @@
from .db import schema_version # NOQA
from .db import schema_version as schema_version # NOQA
__version__ = "0.1.0"

View File

@ -6,10 +6,11 @@ import pytest
import datetime
import os
import sqlite3
from typing import Iterator
@pytest.fixture
def temp_db():
def temp_db() -> Iterator[sqlite3.Cursor]:
"""Create an in-memory database that follow's the nancy schema"""
with sqlite3.connect(":memory:") as conn:
cur = conn.cursor()
@ -18,47 +19,42 @@ def temp_db():
db.init_schema(cur)
cur.execute(
'INSERT INTO local_metadata VALUES ("store_uuid", ?)',
("78dc0b93-0e22-45ee-ae90-49d3575dd70f",),
)
yield cur
@pytest.fixture
def insert_machine(temp_db):
def insert_machine(temp_db: sqlite3.Cursor) -> sqlite3.Cursor:
cur = temp_db
cur.executemany(
"INSERT INTO machine VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
"INSERT INTO machine VALUES " "(?, ?, ?, ?, ?, ?)",
[
(
None, # id INTEGER PRIMARY KEY NOT NULL,
"a5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT, -- platform-dependent unique hardware id
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", # sha256
"a5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT
"lucky", # hostname TEXT, -- platform.node(): 'lucky'
"", # processor TEXT, -- platform.processor():
"Linux", # system TEXT, -- platform.system(): 'Linux'
"5.15.64", # release TEXT, -- platform.release(): '5.15.64'
"aarch64", # machine TEXT, -- platform.machine(): 'x86_64'
"EDT", # timezone TEXT, -- timezone, for interpreting event times
"", # freedesktop_os_release TEXT, -- requires python 3.10
"", # win32_ver TEXT, -- platform.win32_ver() as JSON
"", # mac_ver TEXT -- platform.mac_ver() as JSON
"x86_64", # cpu_type TEXT, -- platform.machine():
"i386", # processor TEXT, -- platform.processor():
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
"afc9b06a23b74341b29d42b8312a4f8a",
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # sha256
"b5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT
"a100", # hostname TEXT, -- platform.node(): 'lucky'
"", # processor TEXT, -- platform.processor():
"Linux", # system TEXT, -- platform.system(): 'Linux'
"5.15.63", # release TEXT, -- platform.release(): '5.15.64'
"x86_64", # machine TEXT, -- platform.machine(): 'x86_64'
"EST", # timezone TEXT, -- timezone, for interpreting event times
"", # freedesktop_os_release TEXT, -- requires python 3.10
"", # win32_ver TEXT, -- platform.win32_ver() as JSON
"", # mac_ver TEXT -- platform.mac_ver() as JSON
"x86_64", # cpu_type TEXT, -- platform.machine():
"i386", # processor TEXT, -- platform.processor():
),
],
)
return cur
def test_insert_machine(insert_machine):
def test_insert_machine(insert_machine: sqlite3.Cursor) -> None:
cur = insert_machine
cur.execute("SELECT * FROM machine")
machines = cur.fetchall()
@ -66,45 +62,45 @@ def test_insert_machine(insert_machine):
@pytest.fixture
def insert_user(insert_machine):
def insert_user(insert_machine: sqlite3.Cursor) -> sqlite3.Cursor:
cur = insert_machine
cur.executemany(
"INSERT INTO user VALUES " "(?, ?, ?, ?, ?)",
"INSERT INTO user VALUES (?, ?, ?, ?, ?)",
[
(
None, # id INTEGER PRIMARY KEY NOT NULL,
( # jacob@lucky
"f2ee9db3526d36b4346980f12a49cb404b924cf784dbd3d5db8a362e363a8070", # sha256
"jacob", # username TEXT NOT NULL,
101, # userid INTEGER,
"Jacob Hinkle", # fullname TEXT,
1, # machine INTEGER NOT NULL,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", # machine
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
( # jacob@a100
"3219c598f4e6e84bea3d0c295f0d2c35f3ce01747f4839fb698b32646b037d3c", # sha256
"jacob", # username TEXT NOT NULL,
10301, # userid INTEGER,
"Jacob Hinkle", # fullname TEXT,
2, # machine INTEGER NOT NULL,
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # machine
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
( # bob@a100
"99389acaef44ed99c16bcaf1bff0c261b35ad110f0544092f711a1354e616f61", # sha256
"bob", # username TEXT NOT NULL,
2035, # userid INTEGER,
"Just Bob", # fullname TEXT,
2, # machine INTEGER NOT NULL,
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # machine
),
],
)
return cur
def test_insert_user(insert_user):
def test_insert_user(insert_user: sqlite3.Cursor) -> None:
cur = insert_user
cur.execute("SELECT * FROM user")
users = cur.fetchall()
assert len(users) == 3
def test_invalid_user_machine(insert_user):
def test_invalid_user_machine(insert_user: sqlite3.Cursor) -> None:
cur = insert_user
with pytest.raises(sqlite3.IntegrityError):
# should fail foreign key constraint
@ -133,66 +129,44 @@ def test_invalid_user_machine(insert_user):
@pytest.fixture
def insert_store(insert_machine):
def insert_store(insert_machine: sqlite3.Cursor) -> sqlite3.Cursor:
import uuid
cur = insert_machine
cur.executemany(
"INSERT INTO store VALUES " "(?, ?, ?, ?, ?)",
"INSERT INTO store VALUES " "(?)",
[
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # machine INTEGER,
"/path/to/first/store", # dbpath TEXT NOT NULL,
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
False, # imported BOOL,
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # machine INTEGER,
"/path/to/dependency/store", # dbpath TEXT NOT NULL,
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
True, # imported BOOL,
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
2, # machine INTEGER,
# same path but on a separate machine
"/path/to/first/store", # dbpath TEXT NOT NULL,
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
True, # imported BOOL,
),
("63c2a84a-524f-46ad-b512-b99e80f7385b",),
("3be35ff6-6a7b-49cf-ba3e-c610b781b54e",),
("b8f685d8-2322-403d-a59c-fb8301229788",),
],
)
return cur
@pytest.fixture
def insert_directories(insert_store):
def insert_directories(insert_store: sqlite3.Cursor) -> sqlite3.Cursor:
cur = insert_store
cur.executemany(
"INSERT INTO filedir VALUES " "(?, ?, ?, ?, ?)",
"INSERT INTO filedir VALUES (?, ?, ?, ?)",
[
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # store INTEGER NOT NULL,
".", # filename TEXT, -- only a filename, not a path
None, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
False, # frozen BOOL NOT NULL,
"71e75275-0847-4e47-9df5-1f2bbe01da91", # uuid
"63c2a84a-524f-46ad-b512-b99e80f7385b", # store
".", # filename
None, # parent
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # store INTEGER NOT NULL,
"foo", # filename TEXT, -- only a filename, not a path
1, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
False, # frozen BOOL NOT NULL,
"6051a251-d38a-4d8c-ba57-ac74023ec2f4", # uuid
"63c2a84a-524f-46ad-b512-b99e80f7385b", # store
"foo", # filename
"71e75275-0847-4e47-9df5-1f2bbe01da91", # parent
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
2, # store INTEGER NOT NULL,
".", # filename TEXT, -- only a filename, not a path
None, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
False, # frozen BOOL NOT NULL,
"5f74b5f2-eac8-4a86-90f6-4f7c7981564d", # uuid
"3be35ff6-6a7b-49cf-ba3e-c610b781b54e", # store
".", # filename
None, # parent
),
],
)
@ -200,34 +174,34 @@ def insert_directories(insert_store):
"INSERT INTO filedir_version VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
"baad81ef-7cc6-48df-973c-e9f32ceda19a", # uuid
"71e75275-0847-4e47-9df5-1f2bbe01da91", # filedir
datetime.datetime.now().timestamp(),
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
None, # source_task INTEGER,
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
"c4037a74-88cd-4a1d-b158-4ebebb25f10d", # uuid
"71e75275-0847-4e47-9df5-1f2bbe01da91", # filedir
datetime.datetime.now().timestamp(),
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
None, # source_task INTEGER,
),
(
None, # id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
"dcfbe086-3c4b-43b8-a550-974610b86267", # uuid
"5f74b5f2-eac8-4a86-90f6-4f7c7981564d", # filedir
datetime.datetime.now().timestamp(),
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
None, # source_task INTEGER,
@ -237,18 +211,17 @@ def insert_directories(insert_store):
return cur
def test_crossstore_directory_insert(insert_directories):
def test_crossstore_directory_insert(insert_directories: sqlite3.Cursor) -> None:
cur = insert_directories
with pytest.raises(sqlite3.IntegrityError):
# declaring directory as belonging to store 2, but parent's store is 1
cur.execute(
"INSERT INTO filedir VALUES " "(?, ?, ?, ?, ?)",
"INSERT INTO filedir VALUES (?, ?, ?, ?)",
(
None, # id INTEGER PRIMARY KEY NOT NULL,
2, # store INTEGER NOT NULL,
"some_dir", # filename TEXT, -- only a filename, not a path
1, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
False, # frozen BOOL NOT NULL,
"b9774e78-2646-476e-b4a8-f6df0b10ba8e", # uuid
"3be35ff6-6a7b-49cf-ba3e-c610b781b54e", # store
"some_dir", # filename
"baad81ef-7cc6-48df-973c-e9f32ceda19a", # parent
),
)
for row in cur.connection.iterdump():
@ -258,7 +231,7 @@ def test_crossstore_directory_insert(insert_directories):
@pytest.fixture
def insert_files(insert_directories):
def insert_files(insert_directories: sqlite3.Cursor) -> sqlite3.Cursor:
cur = insert_directories
cur.execute("SELECT COUNT(*) FROM filedir")
(nprev,) = cur.fetchone()
@ -328,7 +301,7 @@ def insert_files(insert_directories):
# TODO: This test is disabled until triggers are added to check for these types
# of constraints. These became much more complicated to check when I added
# filedir_version.
def disabled_test_nondir_parent_directory_insert(insert_files):
def disabled_test_nondir_parent_directory_insert(insert_files: sqlite3.Cursor) -> None:
cur = insert_files
with pytest.raises(sqlite3.IntegrityError):
# declaring parent as 5, but 5 is a file (plots.png)

View File

@ -1,19 +1,23 @@
"""Test functionality of the Store class."""
from nancy import store
import os
from pathlib import Path
import pytest
import sys
import tempfile
from typing import Iterator
@pytest.fixture
def bare_dir():
def bare_dir() -> Iterator[Path]:
"""Create an emptry temp directory"""
with tempfile.TemporaryDirectory(prefix="nancy_testdir") as d:
yield Path(d)
@pytest.fixture
def filled_dir(bare_dir):
def filled_dir(bare_dir: Path) -> Path:
open(bare_dir / "a.txt", "w").write("foo")
os.makedirs(bare_dir / "stats")
open(bare_dir / "stats" / "metrics.csv", "w").write("bar,baz")
@ -22,24 +26,23 @@ def filled_dir(bare_dir):
return bare_dir
def test_record_untracked_dir(filled_dir):
def test_record_untracked_dir(filled_dir: Path) -> None:
from nancy.cli.record import record
record(filled_dir, message="test_record_untracked_dir")
record(store_path=filled_dir, message="test_record_untracked_dir")
@pytest.fixture
def store():
from nancy import store
def empty_store() -> Iterator[store.Store]:
s = store.Store.init(message="test init")
yield s
def test_schema_version_match(store):
def test_schema_version_match(empty_store: store.Store) -> None:
from nancy.version import schema_version
cur = store.conn.cursor()
assert empty_store.conn is not None
cur = empty_store.conn.cursor()
(db_schema_ver,) = cur.execute("PRAGMA user_version;").fetchone()
assert schema_version == db_schema_ver