Change to using UUID and SHA256 as DB keys
This is important for distributed settings, and will make merging databases much simpler. This change is pretty extensive and includes a lot of other stuff like moving some fields between tables, and introducing the `local_metadata` table which is a key-value store and holds the uuid of the "local" store, i.e. the one corresponding to the directory holding that particular nancy.db.
This commit is contained in:
parent
c4648ec042
commit
07ccef601c
@ -5,7 +5,7 @@ description = "Composable tracking of scientific data provenance"
|
||||
authors = ["Jacob Hinkle <jacob.hinkle@jhink.org>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
python = "^3.10"
|
||||
click = "^8.1.3"
|
||||
colorama = "^0.4.5"
|
||||
loguru = "^0.6.0"
|
||||
@ -28,7 +28,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
[tool.tox]
|
||||
legacy_tox_ini = """
|
||||
[tox]
|
||||
envlist = py38,py39,py310,mypy
|
||||
envlist = mypy,py310
|
||||
isolated_build = true
|
||||
|
||||
[testenv]
|
||||
@ -40,7 +40,10 @@ commands =
|
||||
pytest --cov src/nancy
|
||||
|
||||
[testenv:mypy]
|
||||
deps = mypy
|
||||
deps =
|
||||
mypy
|
||||
pytest
|
||||
commands =
|
||||
mypy --strict -p nancy
|
||||
mypy --strict --ignore-missing-imports --no-incremental -p nancy
|
||||
mypy --strict --no-incremental tests/
|
||||
"""
|
||||
|
||||
@ -48,13 +48,16 @@ def print_diff(
|
||||
dirstr = (
|
||||
(filetypecolors["DIR"] + dname + "/" + reset) if dname != "" else ""
|
||||
)
|
||||
fname = filetypecolors.get(entry.filetype, "") + fname + reset
|
||||
assert entry.filetype is not None
|
||||
fname = filetypecolors.get(str(entry.filetype), "") + fname + reset
|
||||
|
||||
if entry.filetype == "LNK": # append symlink target
|
||||
if entry.filetype == fs.FileType.LNK: # append symlink target
|
||||
assert entry.symlink_target is not None
|
||||
fname += " -> " + entry.symlink_target
|
||||
|
||||
relpath = dirstr + fname
|
||||
|
||||
assert entry.sha256 is not None
|
||||
hashchange = (
|
||||
(hashcolor + entry.sha256.hex() + reset + " " + changetags[tag])
|
||||
if show_hashes
|
||||
|
||||
@ -3,11 +3,12 @@ import click
|
||||
from .. import store
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("directory")
|
||||
def freeze(directory):
|
||||
def freeze(directory: str) -> None:
|
||||
"""
|
||||
Initialize tracking in a directory or freeze a tracked directory.
|
||||
|
||||
@ -25,12 +26,12 @@ def freeze(directory):
|
||||
else: # this is an existing store
|
||||
s = store.Store(directory)
|
||||
|
||||
s.freeze()
|
||||
# s.freeze()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
|
||||
def thaw(files):
|
||||
def thaw(files: List[str]) -> None:
|
||||
"""
|
||||
Enable manual alteration of files within a tracked directory.
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ def record(
|
||||
if not os.path.exists(os.path.join(store_path, "nancy.db")):
|
||||
# this is a new store
|
||||
logger.info(f"Initializing new store in {store_path}...")
|
||||
s = store.Store.init(store_path)
|
||||
s = store.Store.init(message=message, directory=store_path)
|
||||
else: # this is an existing store
|
||||
s = store.Store(store_path)
|
||||
|
||||
@ -78,7 +78,7 @@ def record(
|
||||
"of given paths. If given the path to a non-store directory, a new "
|
||||
"store is initialized there.",
|
||||
)
|
||||
def record_cli(show_hashes, no_color, message, store):
|
||||
def record_cli(show_hashes: bool, no_color: bool, message: str, store: str) -> None:
|
||||
"""
|
||||
Initialize tracking or record changes to a tracked directory.
|
||||
"""
|
||||
|
||||
@ -1,83 +1,79 @@
|
||||
from . import user
|
||||
|
||||
from dataclasses import dataclass
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sqlite3
|
||||
import sys
|
||||
from typing import NamedTuple, Optional, TypeVar, Type
|
||||
import time
|
||||
from typing import Callable, Optional, TypeVar, Type
|
||||
|
||||
|
||||
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
|
||||
_EnvironmentT = TypeVar("_EnvironmentT", bound="Environment")
|
||||
|
||||
|
||||
class Environment(NamedTuple):
|
||||
id: Optional[int]
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Environment:
|
||||
envvars_json: str
|
||||
python_implementation: str
|
||||
python_strversion: str
|
||||
python_hexversion: int
|
||||
user: user.User
|
||||
timezone: str
|
||||
release: str
|
||||
freedesktop_os_release: str
|
||||
win32_ver: str
|
||||
mac_ver: str
|
||||
sha256: bytes = b""
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(
|
||||
cls: Type[_EnvironmentT],
|
||||
cur: sqlite3.Cursor,
|
||||
env: Optional[_EnvironmentT] = None,
|
||||
) -> _EnvironmentT:
|
||||
"""Given a DB cursor, find or create row in environment table and fill"""
|
||||
if env is None:
|
||||
env = cls.detect()
|
||||
def __post_init__(self) -> None:
|
||||
m = hashlib.sha256()
|
||||
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
|
||||
upint: Callable[[int], None] = lambda n: m.update(
|
||||
n.to_bytes((n.bit_length() + 7) // 8, byteorder="big"),
|
||||
)
|
||||
upstr(self.envvars_json)
|
||||
upstr(self.python_implementation)
|
||||
upstr(self.python_strversion)
|
||||
upint(self.python_hexversion)
|
||||
m.update(self.user.sha256)
|
||||
upstr(self.timezone)
|
||||
upstr(self.release)
|
||||
upstr(self.freedesktop_os_release)
|
||||
upstr(self.win32_ver)
|
||||
upstr(self.mac_ver)
|
||||
# circumvent freezing to set sha256
|
||||
object.__setattr__(self, "sha256", m.digest())
|
||||
|
||||
u = user.User.find_or_insert(cur)
|
||||
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
|
||||
"""Insert if not exists, without committing."""
|
||||
self.user.maybe_insert(cur)
|
||||
|
||||
env = env._replace(user=u)
|
||||
key = self.sha256.hex()
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
environment
|
||||
WHERE
|
||||
envvars_json = ? AND
|
||||
python_implementation = ? AND
|
||||
python_strversion = ? AND
|
||||
python_hexversion = ? AND
|
||||
user = ?
|
||||
LIMIT 1
|
||||
INSERT OR IGNORE INTO environment VALUES (?,?,?,?,?,?,?,?,?,?,?);
|
||||
""",
|
||||
(
|
||||
env.envvars_json,
|
||||
env.python_implementation,
|
||||
env.python_strversion,
|
||||
env.python_hexversion,
|
||||
env.user.id,
|
||||
key,
|
||||
self.envvars_json,
|
||||
self.python_implementation,
|
||||
self.python_strversion,
|
||||
self.python_hexversion,
|
||||
self.user.sha256.hex(),
|
||||
self.timezone,
|
||||
self.release,
|
||||
self.freedesktop_os_release,
|
||||
self.win32_ver,
|
||||
self.mac_ver,
|
||||
),
|
||||
)
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO environment VALUES (?,?,?,?,?,?);
|
||||
""",
|
||||
(
|
||||
env.id,
|
||||
env.envvars_json,
|
||||
env.python_implementation,
|
||||
env.python_strversion,
|
||||
env.python_hexversion,
|
||||
env.user.id,
|
||||
),
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return env._replace(id=id)
|
||||
return key
|
||||
|
||||
@classmethod
|
||||
def detect(cls: Type[_EnvironmentT]) -> _EnvironmentT:
|
||||
@ -87,11 +83,22 @@ class Environment(NamedTuple):
|
||||
"""
|
||||
u = user.User.detect()
|
||||
|
||||
fdor = ""
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
except AttributeError:
|
||||
# freedesktop_os_release only available for python >= 3.10
|
||||
fdor = ""
|
||||
|
||||
return cls(
|
||||
None,
|
||||
json.dumps(dict(os.environ)),
|
||||
platform.python_implementation(),
|
||||
sys.version,
|
||||
sys.hexversion,
|
||||
u,
|
||||
timezone=time.tzname[time.daylight],
|
||||
release=platform.release(),
|
||||
freedesktop_os_release=fdor,
|
||||
win32_ver=json.dumps(platform.win32_ver()),
|
||||
mac_ver=json.dumps(platform.mac_ver()),
|
||||
)
|
||||
|
||||
@ -203,7 +203,7 @@ class FSEntry:
|
||||
)
|
||||
s = filestat.st_mode
|
||||
|
||||
children = []
|
||||
children: List[FSEntry] = []
|
||||
symlink_target: Optional[Union[str, bytes]] = None
|
||||
if os.path.islink(path):
|
||||
# Check links first, since it is not exclusive with dir or file checks
|
||||
@ -518,7 +518,7 @@ class FSDiff:
|
||||
|
||||
allnames = set(list(Alist.keys()) + list(Blist.keys()))
|
||||
|
||||
modified_children = [
|
||||
modified_children: List[FSDiff] = [
|
||||
cls.compute(
|
||||
Alist.get(n, None),
|
||||
Blist.get(n, None),
|
||||
|
||||
@ -1,104 +1,83 @@
|
||||
from typing import NamedTuple, Optional, Type, TypeVar
|
||||
from dataclasses import dataclass
|
||||
import hashlib
|
||||
import json
|
||||
import platform
|
||||
import sqlite3
|
||||
import time
|
||||
from typing import Callable, NamedTuple, Optional, Type, TypeVar
|
||||
|
||||
|
||||
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
|
||||
_MachineT = TypeVar("_MachineT", bound="Machine")
|
||||
|
||||
|
||||
class Machine(NamedTuple):
|
||||
id: Optional[int]
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Machine:
|
||||
machine_id: Optional[str]
|
||||
hostname: str
|
||||
processor: str
|
||||
system: str
|
||||
release: str
|
||||
cpu_type: str
|
||||
timezone: str
|
||||
freedesktop_os_release: str
|
||||
win32_ver: str
|
||||
mac_ver: str
|
||||
processor: str
|
||||
sha256: bytes = b""
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(
|
||||
cls: Type[_MachineT], cur: sqlite3.Cursor, machine: Optional[_MachineT] = None
|
||||
) -> _MachineT:
|
||||
"""Given a DB cursor, find or create row in machine table and fill"""
|
||||
if machine is None:
|
||||
machine = cls.detect()
|
||||
def __post_init__(self) -> None:
|
||||
"""Derive sha256 from other attributes."""
|
||||
m = hashlib.sha256()
|
||||
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
|
||||
if self.machine_id is not None:
|
||||
upstr(self.machine_id)
|
||||
upstr(self.hostname)
|
||||
upstr(self.processor)
|
||||
upstr(self.system)
|
||||
upstr(self.cpu_type)
|
||||
# circumvent freezing to set sha256
|
||||
object.__setattr__(self, "sha256", m.digest())
|
||||
|
||||
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
|
||||
"""Insert if not exists, without committing."""
|
||||
key = self.sha256.hex()
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
machine
|
||||
WHERE
|
||||
machine_id = ? AND
|
||||
hostname = ? AND
|
||||
processor = ? AND
|
||||
system = ? AND
|
||||
release = ? AND
|
||||
cpu_type = ? AND
|
||||
timezone = ? AND
|
||||
freedesktop_os_release = ? AND
|
||||
win32_ver = ? AND
|
||||
mac_ver = ?
|
||||
LIMIT 1
|
||||
INSERT OR IGNORE INTO machine VALUES (?,?,?,?,?,?);
|
||||
""",
|
||||
machine[1:],
|
||||
(
|
||||
key,
|
||||
self.machine_id,
|
||||
self.hostname,
|
||||
self.system,
|
||||
self.cpu_type,
|
||||
self.processor,
|
||||
),
|
||||
)
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO machine VALUES (?,?,?,?,?,?,?,?,?,?,?);
|
||||
""",
|
||||
machine,
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return machine._replace(id=id)
|
||||
return key
|
||||
|
||||
@classmethod
|
||||
def detect(cls: Type[_MachineT]) -> _MachineT:
|
||||
"""Formats machine-specific information into a MachineInfo object.
|
||||
"""Format machine-specific information into a MachineInfo object.
|
||||
|
||||
Note that 'MachineInfo' objects are properly formatted to be inserted into
|
||||
the `machine` table.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: if system != "Linux"
|
||||
"""
|
||||
fdor = ""
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
except AttributeError:
|
||||
# freedesktop_os_release only available for python >= 3.10
|
||||
fdor = ""
|
||||
system = platform.system()
|
||||
|
||||
mid = None
|
||||
if system == "Linux":
|
||||
if system.lower() == "linux":
|
||||
try:
|
||||
mid = open("/etc/machine-id", "r").read()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return cls(
|
||||
None,
|
||||
machine_id=mid,
|
||||
hostname=platform.node(),
|
||||
processor=platform.processor(),
|
||||
system=system,
|
||||
release=platform.release(),
|
||||
cpu_type=platform.machine(),
|
||||
timezone=time.tzname[time.daylight],
|
||||
freedesktop_os_release=fdor,
|
||||
win32_ver=json.dumps(platform.win32_ver()),
|
||||
mac_ver=json.dumps(platform.mac_ver()),
|
||||
processor=platform.processor(),
|
||||
)
|
||||
|
||||
@ -8,6 +8,15 @@ PRAGMA foreign_keys=ON;
|
||||
-- facilitate easy importing of one database into another
|
||||
|
||||
|
||||
-- Simple key/value table describing _this_ store (not imported ones).
|
||||
-- In particular, the key "local_store" should be the UUID of the store in this
|
||||
-- directory.
|
||||
CREATE TABLE local_metadata(
|
||||
key TEXT PRIMARY KEY NOT NULL,
|
||||
value TEXT
|
||||
);
|
||||
|
||||
|
||||
-- This lets us enable or disable triggers. For example, when importing we need
|
||||
-- to update some tables to account for changing primary keys using ON UPDATE
|
||||
-- CASCADE. However, some dependent tables like filedir may have a trigger that
|
||||
@ -18,58 +27,38 @@ PRAGMA foreign_keys=ON;
|
||||
-- Note that entries in this table only need to be inserted when we know we have
|
||||
-- a need to disable the trigger. Otherwise, this table can be ignored.
|
||||
CREATE TABLE triggers(
|
||||
name TEXT PRIMARY KEY,
|
||||
name TEXT PRIMARY KEY NOT NULL,
|
||||
enabled BOOL
|
||||
);
|
||||
|
||||
|
||||
-- We track host machines, so that we can give better information about file
|
||||
-- locations. This also allows us to see whether platform-dependent behavior may
|
||||
-- come into play. Note that these are not reliable identifiers for purposes of
|
||||
-- tracking down data, but may be helpful context.
|
||||
CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- come into play.
|
||||
-- A row of this table should describe the aspects of a node that are unlikely
|
||||
-- to change during regular software updates. For example, the hostname and CPU
|
||||
-- type are included but not the kernel version. Software that changes due to
|
||||
-- updates should be included in the "environment" table instead.
|
||||
CREATE TABLE machine(
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
machine_id TEXT, -- platform-dependent unique hardware id
|
||||
-- Linux: open('/etc/machine-id', 'r').read() (assumes systemd)
|
||||
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID`
|
||||
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID | awk '{$print $3}' | tr -d \"`
|
||||
-- Windows: `reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography /v MachineGuid`
|
||||
|
||||
hostname TEXT, -- platform.node(): 'lucky'
|
||||
processor TEXT, -- platform.processor():
|
||||
system TEXT, -- platform.system(): 'Linux'
|
||||
release TEXT, -- platform.release(): '5.15.64'
|
||||
cpu_type TEXT, -- platform.machine(): 'x86_64'
|
||||
timezone TEXT, -- timezone, for interpreting event times
|
||||
freedesktop_os_release TEXT, -- requires python 3.10
|
||||
-- platform.freedesktop_os_release() as JSON
|
||||
-- "{'NAME': 'NixOS', 'ID': 'nixos',
|
||||
-- 'PRETTY_NAME': 'NixOS 22.05 (Quokka)', 'BUG_REPORT_URL':
|
||||
-- 'https://github.com/NixOS/nixpkgs/issues', 'BUILD_ID':
|
||||
-- '22.05.20220902.67e4507', 'DOCUMENTATION_URL':
|
||||
-- 'https://nixos.org/learn.html', 'HOME_URL': 'https://nixos.org/', 'LOGO':
|
||||
-- 'nix-snowflake', 'SUPPORT_URL': 'https://nixos.org/community.html',
|
||||
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
|
||||
-- '22.05'}"
|
||||
win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
mac_ver TEXT, -- platform.mac_ver() as JSON
|
||||
|
||||
-- disallow duplicate rows
|
||||
UNIQUE(hostname, processor, system, release, cpu_type, timezone,
|
||||
freedesktop_os_release, win32_ver, mac_ver)
|
||||
processor TEXT -- platform.processor():
|
||||
);
|
||||
-- Programs are run by users on machines
|
||||
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE user(
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
username TEXT NOT NULL, -- getpass.getuser()
|
||||
userid INTEGER, -- os.getuid()
|
||||
fullname TEXT, -- on Linux/OSX: pwd.getpwuid(os.getuid()).pw_gecos
|
||||
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
|
||||
machine INTEGER NOT NULL,
|
||||
|
||||
-- usernames and fullnames change rarely, but it happens. A more restrictive
|
||||
-- constraint would not enable tracking of this info. The following only
|
||||
-- guarantees no duplicate full rows.
|
||||
UNIQUE(username, userid, fullname, machine),
|
||||
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
|
||||
@ -88,41 +77,32 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- Client code should generate random UUIDs in the RFC 4122 variant layout.
|
||||
-- https://datatracker.ietf.org/doc/html/rfc4122.html
|
||||
-- This is possible in Python by simply calling uuid.uuid4() with no arguments
|
||||
CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
|
||||
machine INTEGER,
|
||||
dbpath TEXT NOT NULL,
|
||||
uuid TEXT NOT NULL, -- UUID generated by str(uuid.uuid4())
|
||||
imported BOOL, -- is this the store for the current directory? If not, then it was
|
||||
-- imported in order to describe a dependency.
|
||||
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
CREATE TABLE store (
|
||||
uuid TEXT PRIMARY KEY NOT NULL -- UUID generated by str(uuid.uuid4())
|
||||
);
|
||||
|
||||
|
||||
-- The filedir table holds all files and directories that are tracked by the
|
||||
-- store. This table also holds tracked files and directories that have been
|
||||
-- imported and live outside the current store.
|
||||
-- We do not support renaming files. Once an entry is created here, it should
|
||||
-- only be updated to reflect frozen/thawed status.
|
||||
CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL,
|
||||
store INTEGER NOT NULL,
|
||||
-- We do not support renaming files.
|
||||
CREATE TABLE filedir (
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
store TEXT NOT NULL,
|
||||
name TEXT, -- only a filename, not a path
|
||||
parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
|
||||
frozen BOOL NOT NULL,
|
||||
|
||||
parent TEXT REFERENCES filedir ON UPDATE CASCADE,
|
||||
UNIQUE(store, name, parent)
|
||||
);
|
||||
-- Detect cross-store references
|
||||
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
|
||||
BEGIN SELECT CASE
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE uuid = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent resides in different store')
|
||||
END; END;
|
||||
CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir
|
||||
BEGIN SELECT CASE
|
||||
WHEN (NEW.id != OLD.id OR NEW.store != OLD.store OR NEW.parent != OLD.parent)
|
||||
THEN RAISE (ABORT, 'The only updates to filedir allowed are to the frozen column')
|
||||
END; END;
|
||||
BEGIN
|
||||
SELECT RAISE (ABORT, 'No updates to filedir allowed');
|
||||
END;
|
||||
|
||||
|
||||
-- This table holds _versions_ of files and directories. This table holds
|
||||
@ -131,8 +111,9 @@ END; END;
|
||||
-- independent of filetype (though the computation of content hashes is of
|
||||
-- course dependent on filetype). Each version has a number, and was provided by
|
||||
-- some program (and potentially a datum).
|
||||
CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
|
||||
filedir INTEGER NOT NULL
|
||||
CREATE TABLE filedir_version (
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
filedir TEXT NOT NULL
|
||||
REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
|
||||
|
||||
recorded_time REAL, -- When was this version recorded?
|
||||
@ -144,7 +125,7 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
|
||||
|
||||
-- We record the permissions on each file, in a way that enables reloading
|
||||
-- permissions properly when thawing after a freeze operation.
|
||||
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
|
||||
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink.
|
||||
|
||||
@ -158,7 +139,7 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- can defer by only hashing files and directories at the end of a program.
|
||||
sha256 TEXT,
|
||||
|
||||
source_task INTEGER REFERENCES task (id) ON UPDATE CASCADE
|
||||
source_task TEXT REFERENCES task ON UPDATE CASCADE
|
||||
);
|
||||
-- Disallow UPDATING filedir_version. Instead, new version should be created.
|
||||
-- One exception is during importing, in which case we can disable the trigger
|
||||
@ -166,8 +147,7 @@ INSERT INTO triggers VALUES('update_filedir_version', TRUE);
|
||||
CREATE TRIGGER update_filedir_version BEFORE UPDATE ON filedir_version
|
||||
BEGIN SELECT CASE
|
||||
WHEN (SELECT enabled FROM triggers WHERE name = 'update_filedir_version')
|
||||
THEN RAISE (ABORT,
|
||||
'Updating filedir versions is prohibited, other than for id changes')
|
||||
THEN RAISE (ABORT, 'Updating filedir versions is prohibited')
|
||||
END; END;
|
||||
-- TODO: check for inserting inconsistent version info re. deleted flag/filetype
|
||||
|
||||
@ -176,17 +156,28 @@ END; END;
|
||||
-- A computational environment which can execute "programs". Note that the
|
||||
-- python executable being used, and environment variables are recorded here.
|
||||
-- Other info is available in the parent "machine" table.
|
||||
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE environment (
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
|
||||
envvars_json TEXT, -- json.dumps(dict(os.environ))
|
||||
python_implementation TEXT, -- platform.python_implementation(): 'cpython'
|
||||
python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]'
|
||||
python_hexversion INTEGER, -- sys.hexversion: 50923504
|
||||
user INTEGER,
|
||||
|
||||
-- disallow duplicate rows
|
||||
UNIQUE(envvars_json, python_implementation, python_strversion, python_hexversion, user),
|
||||
|
||||
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
|
||||
user TEXT REFERENCES user ON UPDATE CASCADE,
|
||||
timezone TEXT, -- timezone, for interpreting event times
|
||||
platform_release TEXT, -- platform.release(): '5.15.64'
|
||||
freedesktop_os_release TEXT, -- requires python 3.10
|
||||
-- platform.freedesktop_os_release() as JSON
|
||||
-- "{'NAME': 'NixOS', 'ID': 'nixos',
|
||||
-- 'PRETTY_NAME': 'NixOS 22.05 (Quokka)', 'BUG_REPORT_URL':
|
||||
-- 'https://github.com/NixOS/nixpkgs/issues', 'BUILD_ID':
|
||||
-- '22.05.20220902.67e4507', 'DOCUMENTATION_URL':
|
||||
-- 'https://nixos.org/learn.html', 'HOME_URL': 'https://nixos.org/', 'LOGO':
|
||||
-- 'nix-snowflake', 'SUPPORT_URL': 'https://nixos.org/community.html',
|
||||
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
|
||||
-- '22.05'}"
|
||||
win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
);
|
||||
|
||||
|
||||
@ -195,7 +186,8 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- python script, multiple "programs" may be created. Also note that historical
|
||||
-- programs are automatically imported and merged when possible when loading a
|
||||
-- "datum" from disk.
|
||||
CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE program (
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
|
||||
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
|
||||
|
||||
@ -205,65 +197,57 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
end_time REAL,
|
||||
|
||||
process_id INTEGER, -- host PID of python process on host OS
|
||||
environment INTEGER NOT NULL,
|
||||
message TEXT, -- user-defined message to help distinguish similar runs
|
||||
|
||||
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
|
||||
environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE,
|
||||
message TEXT NOT NULL -- user-defined message to help distinguish similar runs
|
||||
);
|
||||
|
||||
|
||||
-- We try to track all python packages that impact execution by traversing a
|
||||
-- copy of sys.modules. This is done once before a "program" and once after in
|
||||
-- case some calling code winds up calling a previously-unloaded module.
|
||||
CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE py_package (
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
version TEXT
|
||||
version TEXT,
|
||||
|
||||
UNIQUE (name, version)
|
||||
);
|
||||
-- A py_module describes any python module file containing decorated Functions.
|
||||
-- Modules are tracked since they impact the global scope of function calls.
|
||||
CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE py_module(
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
code TEXT,
|
||||
py_package INTEGER,
|
||||
|
||||
FOREIGN KEY (py_package) REFERENCES py_package (id) ON UPDATE CASCADE
|
||||
code TEXT, -- code doesn't have to be included, but should be used to create sha256
|
||||
py_package TEXT REFERENCES py_package ON UPDATE CASCADE
|
||||
);
|
||||
-- A py_function just describes a function, without reference to its arguments.
|
||||
-- It can have inputs and outputs, which are described in the func_inputs and
|
||||
-- func_outputs children tables.
|
||||
CREATE TABLE py_function(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE py_function(
|
||||
sha256 TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
py_module INTEGER NOT NULL,
|
||||
|
||||
-- we use POSIX timestamps for time recording.
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
|
||||
FOREIGN KEY (py_module) REFERENCES py_module (id) ON UPDATE CASCADE
|
||||
py_module TEXT NOT NULL REFERENCES py_module ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TABLE py_function_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE py_function_input(
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
typename TEXT NOT NULL,
|
||||
py_function INTEGER NOT NULL,
|
||||
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
|
||||
|
||||
position INTEGER,
|
||||
posonly BOOL,
|
||||
kwonly BOOL,
|
||||
|
||||
description TEXT,
|
||||
|
||||
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE
|
||||
description TEXT
|
||||
);
|
||||
CREATE TABLE py_function_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE py_function_output(
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT,
|
||||
typename TEXT NOT NULL,
|
||||
py_function INTEGER NOT NULL,
|
||||
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
|
||||
|
||||
position INTEGER NOT NULL,
|
||||
description TEXT,
|
||||
|
||||
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE
|
||||
description TEXT
|
||||
);
|
||||
|
||||
|
||||
@ -271,17 +255,17 @@ CREATE TABLE py_function_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- particular set of inputs which are themselves data (see datum table). A task
|
||||
-- is executed in the context of a "program". Within a program, tasks are
|
||||
-- typically evaluated in a serial manner.
|
||||
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
|
||||
program INTEGER NOT NULL,
|
||||
CREATE TABLE task(
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE,
|
||||
|
||||
py_function INTEGER, -- func is NULL for some built-in functionality like "record" programs
|
||||
|
||||
FOREIGN KEY (py_function) REFERENCES py_function (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
|
||||
-- py_function is NULL for some built-in functionality like "RECORD" programs
|
||||
py_function TEXT REFERENCES py_function ON UPDATE CASCADE
|
||||
);
|
||||
-- A datum is an object that is computed as the output of a task, given as a
|
||||
-- literal value in a config file, or loaded from a file.
|
||||
CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE datum(
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
-- provider_type describes where the datum came from. Choices are:
|
||||
-- - COMPUTED: output of a decorated Function
|
||||
-- - IMPORTED: imported output from a prior program
|
||||
@ -290,28 +274,23 @@ CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
|
||||
provider_type TEXT,
|
||||
|
||||
-- task and task_output are only used when provider_type='COMPUTED'
|
||||
task INTEGER,
|
||||
task_output INTEGER,
|
||||
task TEXT REFERENCES task ON UPDATE CASCADE,
|
||||
task_output TEXT REFERENCES func_output ON UPDATE CASCADE,
|
||||
|
||||
literal_json TEXT, -- if provider_type='LITERAL', a JSON representation of the value
|
||||
|
||||
typename TEXT NOT NULL, -- string representation of the data type
|
||||
|
||||
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (task_output) REFERENCES func_output (id) ON UPDATE CASCADE
|
||||
typename TEXT NOT NULL -- string representation of the data type
|
||||
);
|
||||
-- A task_input records the version of a Datum that is passed to a function
|
||||
CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
task INTEGER NOT NULL,
|
||||
py_function_input INTEGER, -- if this was a python function, reference which input
|
||||
CREATE TABLE task_input(
|
||||
uuid TEXT PRIMARY KEY NOT NULL,
|
||||
task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE,
|
||||
-- if this was a python function, reference which input
|
||||
py_function_input TEXT REFERENCES py_function_input ON UPDATE CASCADE,
|
||||
|
||||
datum INTEGER NOT NULL,
|
||||
datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE,
|
||||
-- Data have versions to facilitate tracking non-const operations. If a datum
|
||||
-- is passed to a non-const operation, it must increment its internal
|
||||
-- version
|
||||
datum_version INTEGER NOT NULL,
|
||||
|
||||
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (py_function_input) REFERENCES py_function_input (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (datum) REFERENCES datum (id) ON UPDATE CASCADE
|
||||
datum_version INTEGER NOT NULL
|
||||
);
|
||||
|
||||
@ -4,16 +4,18 @@ from loguru import logger
|
||||
|
||||
from . import db, environment, fs
|
||||
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
import datetime
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
from typing import Any, Optional, TypeVar, Type, Union
|
||||
from typing import Any, Iterator, Optional, TypeVar, Type, Union
|
||||
import uuid
|
||||
import warnings
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(slots=True)
|
||||
class Program:
|
||||
store: "Store"
|
||||
name: str
|
||||
@ -23,18 +25,22 @@ class Program:
|
||||
start_time: Optional[datetime.datetime] = None
|
||||
evaluated: bool = False
|
||||
|
||||
uuid: str = ""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.uuid = str(uuid.uuid4())
|
||||
|
||||
def __enter__(self) -> "Program":
|
||||
if self.evaluated:
|
||||
raise RuntimeError("Cannot re-enter a Program context")
|
||||
|
||||
assert self.store.conn is not None
|
||||
cur = self.store.conn.cursor()
|
||||
env = environment.Environment.find_or_insert(cur)
|
||||
|
||||
env = environment.Environment.detect()
|
||||
with self.store.committing() as cur:
|
||||
env.maybe_insert(cur)
|
||||
cur.execute(
|
||||
"INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
self.uuid, # uuid
|
||||
self.name, # name TEXT,
|
||||
# name of the program, usually written lowercase by calling
|
||||
# code e.g. cnn_crossval
|
||||
@ -43,20 +49,21 @@ class Program:
|
||||
None, # start_time REAL,
|
||||
None, # end_time REAL,
|
||||
os.getpid(), # process_id INTEGER, -- host PID of python process on host OS
|
||||
env.id, # environment INTEGER NOT NULL,
|
||||
env.sha256.hex(), # environment INTEGER NOT NULL,
|
||||
self.message, # user-defined message to help distinguish similar runs
|
||||
),
|
||||
)
|
||||
self.id = cur.lastrowid
|
||||
|
||||
self.start_time = datetime.datetime.now()
|
||||
|
||||
return self
|
||||
|
||||
def new_task(self, name: str, py_function_id: Optional[int] = None) -> int:
|
||||
def new_task(
|
||||
self,
|
||||
name: str,
|
||||
cur: sqlite3.Cursor,
|
||||
py_function_id: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Create a new task and return its id"""
|
||||
assert self.store.conn is not None
|
||||
cur = self.store.conn.cursor()
|
||||
cur.execute(
|
||||
"INSERT INTO task VALUES (?, ?, ?)",
|
||||
(None, self.id, py_function_id),
|
||||
@ -75,7 +82,7 @@ class Program:
|
||||
# record start and end times in store
|
||||
|
||||
assert self.store.conn is not None
|
||||
cur = self.store.conn.cursor()
|
||||
with self.store.committing() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE
|
||||
@ -84,12 +91,11 @@ class Program:
|
||||
start_time = ?,
|
||||
end_time = ?
|
||||
WHERE
|
||||
id = ?
|
||||
uuid = ?
|
||||
""",
|
||||
(self.start_time, end_time, self.id),
|
||||
(self.start_time, end_time, self.uuid),
|
||||
)
|
||||
cur.connection.commit()
|
||||
self._evaluated = True # prevent re-running
|
||||
self.evaluated = True # prevent re-running
|
||||
assert self.start_time is not None
|
||||
elapsed = end_time - self.start_time
|
||||
logger.success(
|
||||
@ -126,11 +132,14 @@ class Store:
|
||||
else:
|
||||
self.path = Path(directory)
|
||||
self.db_path = self.path / "nancy.db"
|
||||
|
||||
if conn is None:
|
||||
self.connect()
|
||||
else:
|
||||
self.conn = conn
|
||||
|
||||
self.store_uuid = self.find_store_uuid()
|
||||
|
||||
def copy(self: _StoreT, store_path: fs.PathStr) -> _StoreT:
|
||||
"""Copy this store to a new store path"""
|
||||
assert self.conn is not None
|
||||
@ -141,10 +150,28 @@ class Store:
|
||||
return self.__class__(store_path)
|
||||
|
||||
def connect(self) -> sqlite3.Connection:
|
||||
self.conn = sqlite3.connect(self.db_path)
|
||||
self.conn = sqlite3.connect(self.db_path, isolation_level="DEFERRED")
|
||||
self.conn.cursor().execute("PRAGMA foreign_keys = ON;")
|
||||
return self.conn
|
||||
|
||||
@contextmanager
|
||||
def committing(self) -> Iterator[sqlite3.Cursor]:
|
||||
"""Return a context manager that gives a cursor and commits on exit."""
|
||||
assert self.conn is not None
|
||||
cur = self.conn.cursor()
|
||||
yield cur
|
||||
self.conn.commit()
|
||||
|
||||
def find_store_uuid(self, cur: Optional[sqlite3.Cursor] = None) -> Optional[str]:
|
||||
assert self.conn is not None
|
||||
if cur is None:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(
|
||||
'SELECT value FROM local_metadata WHERE key == "store_uuid" LIMIT 1'
|
||||
)
|
||||
res = cur.fetchone()
|
||||
return None if res is None else res[0]
|
||||
|
||||
@classmethod
|
||||
def init(
|
||||
cls: Type[_StoreT], message: str, directory: Optional[fs.PathStr] = None
|
||||
@ -163,14 +190,27 @@ class Store:
|
||||
f"File {db_path} exists. Refusing to re-initialize",
|
||||
)
|
||||
# initialize a database in the target directory
|
||||
conn = sqlite3.connect(db_path)
|
||||
db.init_schema(conn.cursor())
|
||||
conn = sqlite3.connect(db_path, isolation_level="DEFERRED")
|
||||
cur = conn.cursor()
|
||||
db.init_schema(cur)
|
||||
conn.commit()
|
||||
cur.close()
|
||||
|
||||
new_store = cls(directory, conn)
|
||||
|
||||
with new_store.program("INIT", message) as p:
|
||||
# set the timing to the actual times it took to initialize the db
|
||||
p.start_time = start_time
|
||||
|
||||
# generate a new UUID for this store
|
||||
assert new_store.store_uuid is None
|
||||
new_store.store_uuid = str(uuid.uuid4())
|
||||
with new_store.committing() as cur:
|
||||
cur.execute(
|
||||
'INSERT INTO local_metadata VALUES ("store_uuid", ?)',
|
||||
(new_store.store_uuid,),
|
||||
)
|
||||
|
||||
return new_store
|
||||
|
||||
def make_readonly(self) -> None:
|
||||
@ -345,26 +385,15 @@ class Store:
|
||||
cur = self.conn.cursor()
|
||||
|
||||
with self.program("RECORD", message) as p:
|
||||
with self.committing() as cur: # entire record operation is one transaction
|
||||
# create a task for this operation
|
||||
task_id = p.new_task("Store._record_recursive")
|
||||
task_id = p.new_task("Store._record_recursive", cur=cur)
|
||||
|
||||
# descend the diff, tracking parent filedir IDs, creating them and
|
||||
# recording new versions of each, when necessary
|
||||
self._record_recursive(diff, cur, source_task=task_id)
|
||||
|
||||
|
||||
class StoreFile:
|
||||
"""Describes a file that is recorded in the store."""
|
||||
|
||||
def __init__(self, store: Store, rel_path: fs.PathStr):
|
||||
self.store = store
|
||||
self.rel_path = rel_path
|
||||
|
||||
def save(self) -> None:
|
||||
# call the appropriate save method
|
||||
pass
|
||||
|
||||
|
||||
def find_store(path: Union[str, "os.PathLike[str]"]) -> Optional[str]:
|
||||
"""
|
||||
Given a path, find a store dir containing nancy.db at any level above it.
|
||||
|
||||
@ -1,76 +1,59 @@
|
||||
from . import machine
|
||||
|
||||
from dataclasses import dataclass
|
||||
import hashlib
|
||||
import getpass
|
||||
import os
|
||||
import pwd
|
||||
import sqlite3
|
||||
from typing import NamedTuple, Optional, Type, TypeVar
|
||||
from typing import Callable, NamedTuple, Optional, Type, TypeVar
|
||||
|
||||
|
||||
# see https://stackoverflow.com/questions/44640479/type-annotation-for-classmethod-returning-instance
|
||||
_UserT = TypeVar("_UserT", bound="User")
|
||||
|
||||
|
||||
class User(NamedTuple):
|
||||
id: Optional[int] # if not None, this is `id` in the `machine` table
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class User:
|
||||
username: str
|
||||
userid: int
|
||||
fullname: str
|
||||
machine: machine.Machine
|
||||
sha256: bytes = b""
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(
|
||||
cls: Type[_UserT], cur: sqlite3.Cursor, user: Optional[_UserT] = None
|
||||
) -> _UserT:
|
||||
"""Given a DB cursor, find or create row in user table and fill"""
|
||||
if user is None:
|
||||
user = cls.detect()
|
||||
def __post_init__(self) -> None:
|
||||
m = hashlib.sha256()
|
||||
upstr: Callable[[str], None] = lambda s: m.update(bytes(s, "utf-8"))
|
||||
upint: Callable[[int], None] = lambda n: m.update(
|
||||
n.to_bytes((n.bit_length() + 7) // 8, byteorder="big"),
|
||||
)
|
||||
upstr(self.username)
|
||||
upint(self.userid)
|
||||
upstr(self.fullname)
|
||||
m.update(self.machine.sha256)
|
||||
# circumvent freezing to set sha256
|
||||
object.__setattr__(self, "sha256", m.digest())
|
||||
|
||||
m = machine.Machine.find_or_insert(cur)
|
||||
def maybe_insert(self, cur: sqlite3.Cursor) -> str:
|
||||
"""Insert if not exists, without committing."""
|
||||
self.machine.maybe_insert(cur)
|
||||
|
||||
user = user._replace(machine=m)
|
||||
key = self.sha256.hex()
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
user
|
||||
WHERE
|
||||
username = ? AND
|
||||
userid = ? AND
|
||||
fullname = ? AND
|
||||
machine = ?
|
||||
LIMIT 1
|
||||
INSERT OR IGNORE INTO user VALUES (?,?,?,?,?);
|
||||
""",
|
||||
(
|
||||
user.username,
|
||||
user.userid,
|
||||
user.fullname,
|
||||
user.machine.id,
|
||||
key,
|
||||
self.username,
|
||||
self.userid,
|
||||
self.fullname,
|
||||
self.machine.sha256.hex(),
|
||||
),
|
||||
)
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO user VALUES (?,?,?,?,?);
|
||||
""",
|
||||
(
|
||||
user.id,
|
||||
user.username,
|
||||
user.userid,
|
||||
user.fullname,
|
||||
user.machine.id,
|
||||
),
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return user._replace(id=id)
|
||||
return key
|
||||
|
||||
@classmethod
|
||||
def detect(cls: Type[_UserT]) -> _UserT:
|
||||
@ -81,12 +64,9 @@ class User(NamedTuple):
|
||||
# TODO: will this fail on Windows/OSX?
|
||||
fullname = pwd.getpwuid(os.getuid()).pw_gecos
|
||||
|
||||
m = machine.Machine.detect()
|
||||
|
||||
return cls(
|
||||
None,
|
||||
getpass.getuser(),
|
||||
os.getuid(),
|
||||
fullname,
|
||||
m,
|
||||
machine=machine.Machine.detect(),
|
||||
)
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
from .db import schema_version # NOQA
|
||||
from .db import schema_version as schema_version # NOQA
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
161
tests/test_db.py
161
tests/test_db.py
@ -6,10 +6,11 @@ import pytest
|
||||
import datetime
|
||||
import os
|
||||
import sqlite3
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db():
|
||||
def temp_db() -> Iterator[sqlite3.Cursor]:
|
||||
"""Create an in-memory database that follow's the nancy schema"""
|
||||
with sqlite3.connect(":memory:") as conn:
|
||||
cur = conn.cursor()
|
||||
@ -18,47 +19,42 @@ def temp_db():
|
||||
|
||||
db.init_schema(cur)
|
||||
|
||||
cur.execute(
|
||||
'INSERT INTO local_metadata VALUES ("store_uuid", ?)',
|
||||
("78dc0b93-0e22-45ee-ae90-49d3575dd70f",),
|
||||
)
|
||||
|
||||
yield cur
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_machine(temp_db):
|
||||
def insert_machine(temp_db: sqlite3.Cursor) -> sqlite3.Cursor:
|
||||
cur = temp_db
|
||||
cur.executemany(
|
||||
"INSERT INTO machine VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
"INSERT INTO machine VALUES " "(?, ?, ?, ?, ?, ?)",
|
||||
[
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
"a5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT, -- platform-dependent unique hardware id
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", # sha256
|
||||
"a5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT
|
||||
"lucky", # hostname TEXT, -- platform.node(): 'lucky'
|
||||
"", # processor TEXT, -- platform.processor():
|
||||
"Linux", # system TEXT, -- platform.system(): 'Linux'
|
||||
"5.15.64", # release TEXT, -- platform.release(): '5.15.64'
|
||||
"aarch64", # machine TEXT, -- platform.machine(): 'x86_64'
|
||||
"EDT", # timezone TEXT, -- timezone, for interpreting event times
|
||||
"", # freedesktop_os_release TEXT, -- requires python 3.10
|
||||
"", # win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
"", # mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
"x86_64", # cpu_type TEXT, -- platform.machine():
|
||||
"i386", # processor TEXT, -- platform.processor():
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
"afc9b06a23b74341b29d42b8312a4f8a",
|
||||
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # sha256
|
||||
"b5d97c08a15c4db69f5fded523a1bfe3", # machine_id TEXT
|
||||
"a100", # hostname TEXT, -- platform.node(): 'lucky'
|
||||
"", # processor TEXT, -- platform.processor():
|
||||
"Linux", # system TEXT, -- platform.system(): 'Linux'
|
||||
"5.15.63", # release TEXT, -- platform.release(): '5.15.64'
|
||||
"x86_64", # machine TEXT, -- platform.machine(): 'x86_64'
|
||||
"EST", # timezone TEXT, -- timezone, for interpreting event times
|
||||
"", # freedesktop_os_release TEXT, -- requires python 3.10
|
||||
"", # win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
"", # mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
"x86_64", # cpu_type TEXT, -- platform.machine():
|
||||
"i386", # processor TEXT, -- platform.processor():
|
||||
),
|
||||
],
|
||||
)
|
||||
return cur
|
||||
|
||||
|
||||
def test_insert_machine(insert_machine):
|
||||
def test_insert_machine(insert_machine: sqlite3.Cursor) -> None:
|
||||
cur = insert_machine
|
||||
cur.execute("SELECT * FROM machine")
|
||||
machines = cur.fetchall()
|
||||
@ -66,45 +62,45 @@ def test_insert_machine(insert_machine):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_user(insert_machine):
|
||||
def insert_user(insert_machine: sqlite3.Cursor) -> sqlite3.Cursor:
|
||||
cur = insert_machine
|
||||
cur.executemany(
|
||||
"INSERT INTO user VALUES " "(?, ?, ?, ?, ?)",
|
||||
"INSERT INTO user VALUES (?, ?, ?, ?, ?)",
|
||||
[
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
( # jacob@lucky
|
||||
"f2ee9db3526d36b4346980f12a49cb404b924cf784dbd3d5db8a362e363a8070", # sha256
|
||||
"jacob", # username TEXT NOT NULL,
|
||||
101, # userid INTEGER,
|
||||
"Jacob Hinkle", # fullname TEXT,
|
||||
1, # machine INTEGER NOT NULL,
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", # machine
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
( # jacob@a100
|
||||
"3219c598f4e6e84bea3d0c295f0d2c35f3ce01747f4839fb698b32646b037d3c", # sha256
|
||||
"jacob", # username TEXT NOT NULL,
|
||||
10301, # userid INTEGER,
|
||||
"Jacob Hinkle", # fullname TEXT,
|
||||
2, # machine INTEGER NOT NULL,
|
||||
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # machine
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
( # bob@a100
|
||||
"99389acaef44ed99c16bcaf1bff0c261b35ad110f0544092f711a1354e616f61", # sha256
|
||||
"bob", # username TEXT NOT NULL,
|
||||
2035, # userid INTEGER,
|
||||
"Just Bob", # fullname TEXT,
|
||||
2, # machine INTEGER NOT NULL,
|
||||
"2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", # machine
|
||||
),
|
||||
],
|
||||
)
|
||||
return cur
|
||||
|
||||
|
||||
def test_insert_user(insert_user):
|
||||
def test_insert_user(insert_user: sqlite3.Cursor) -> None:
|
||||
cur = insert_user
|
||||
cur.execute("SELECT * FROM user")
|
||||
users = cur.fetchall()
|
||||
assert len(users) == 3
|
||||
|
||||
|
||||
def test_invalid_user_machine(insert_user):
|
||||
def test_invalid_user_machine(insert_user: sqlite3.Cursor) -> None:
|
||||
cur = insert_user
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# should fail foreign key constraint
|
||||
@ -133,66 +129,44 @@ def test_invalid_user_machine(insert_user):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_store(insert_machine):
|
||||
def insert_store(insert_machine: sqlite3.Cursor) -> sqlite3.Cursor:
|
||||
import uuid
|
||||
|
||||
cur = insert_machine
|
||||
cur.executemany(
|
||||
"INSERT INTO store VALUES " "(?, ?, ?, ?, ?)",
|
||||
"INSERT INTO store VALUES " "(?)",
|
||||
[
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # machine INTEGER,
|
||||
"/path/to/first/store", # dbpath TEXT NOT NULL,
|
||||
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
|
||||
False, # imported BOOL,
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # machine INTEGER,
|
||||
"/path/to/dependency/store", # dbpath TEXT NOT NULL,
|
||||
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
|
||||
True, # imported BOOL,
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, # machine INTEGER,
|
||||
# same path but on a separate machine
|
||||
"/path/to/first/store", # dbpath TEXT NOT NULL,
|
||||
str(uuid.uuid4()), # -- UUID generated by str(uuid.uuid4())
|
||||
True, # imported BOOL,
|
||||
),
|
||||
("63c2a84a-524f-46ad-b512-b99e80f7385b",),
|
||||
("3be35ff6-6a7b-49cf-ba3e-c610b781b54e",),
|
||||
("b8f685d8-2322-403d-a59c-fb8301229788",),
|
||||
],
|
||||
)
|
||||
return cur
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_directories(insert_store):
|
||||
def insert_directories(insert_store: sqlite3.Cursor) -> sqlite3.Cursor:
|
||||
cur = insert_store
|
||||
cur.executemany(
|
||||
"INSERT INTO filedir VALUES " "(?, ?, ?, ?, ?)",
|
||||
"INSERT INTO filedir VALUES (?, ?, ?, ?)",
|
||||
[
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # store INTEGER NOT NULL,
|
||||
".", # filename TEXT, -- only a filename, not a path
|
||||
None, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
|
||||
False, # frozen BOOL NOT NULL,
|
||||
"71e75275-0847-4e47-9df5-1f2bbe01da91", # uuid
|
||||
"63c2a84a-524f-46ad-b512-b99e80f7385b", # store
|
||||
".", # filename
|
||||
None, # parent
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # store INTEGER NOT NULL,
|
||||
"foo", # filename TEXT, -- only a filename, not a path
|
||||
1, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
|
||||
False, # frozen BOOL NOT NULL,
|
||||
"6051a251-d38a-4d8c-ba57-ac74023ec2f4", # uuid
|
||||
"63c2a84a-524f-46ad-b512-b99e80f7385b", # store
|
||||
"foo", # filename
|
||||
"71e75275-0847-4e47-9df5-1f2bbe01da91", # parent
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, # store INTEGER NOT NULL,
|
||||
".", # filename TEXT, -- only a filename, not a path
|
||||
None, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
|
||||
False, # frozen BOOL NOT NULL,
|
||||
"5f74b5f2-eac8-4a86-90f6-4f7c7981564d", # uuid
|
||||
"3be35ff6-6a7b-49cf-ba3e-c610b781b54e", # store
|
||||
".", # filename
|
||||
None, # parent
|
||||
),
|
||||
],
|
||||
)
|
||||
@ -200,34 +174,34 @@ def insert_directories(insert_store):
|
||||
"INSERT INTO filedir_version VALUES " "(?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
[
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
|
||||
"baad81ef-7cc6-48df-973c-e9f32ceda19a", # uuid
|
||||
"71e75275-0847-4e47-9df5-1f2bbe01da91", # filedir
|
||||
datetime.datetime.now().timestamp(),
|
||||
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
|
||||
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
|
||||
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
|
||||
None, # source_task INTEGER,
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
|
||||
"c4037a74-88cd-4a1d-b158-4ebebb25f10d", # uuid
|
||||
"71e75275-0847-4e47-9df5-1f2bbe01da91", # filedir
|
||||
datetime.datetime.now().timestamp(),
|
||||
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
|
||||
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
|
||||
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
|
||||
None, # source_task INTEGER,
|
||||
),
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
|
||||
"dcfbe086-3c4b-43b8-a550-974610b86267", # uuid
|
||||
"5f74b5f2-eac8-4a86-90f6-4f7c7981564d", # filedir
|
||||
datetime.datetime.now().timestamp(),
|
||||
"DIR", # filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
False, # deleted BOOL NOT NULL, -- set True when recording a deleted file
|
||||
"drwxrwxr-x", # unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
"drwxrwxr-x", # perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, # symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
|
||||
"a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd", # sha256 TEXT,
|
||||
None, # source_task INTEGER,
|
||||
@ -237,18 +211,17 @@ def insert_directories(insert_store):
|
||||
return cur
|
||||
|
||||
|
||||
def test_crossstore_directory_insert(insert_directories):
|
||||
def test_crossstore_directory_insert(insert_directories: sqlite3.Cursor) -> None:
|
||||
cur = insert_directories
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# declaring directory as belonging to store 2, but parent's store is 1
|
||||
cur.execute(
|
||||
"INSERT INTO filedir VALUES " "(?, ?, ?, ?, ?)",
|
||||
"INSERT INTO filedir VALUES (?, ?, ?, ?)",
|
||||
(
|
||||
None, # id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, # store INTEGER NOT NULL,
|
||||
"some_dir", # filename TEXT, -- only a filename, not a path
|
||||
1, # parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
|
||||
False, # frozen BOOL NOT NULL,
|
||||
"b9774e78-2646-476e-b4a8-f6df0b10ba8e", # uuid
|
||||
"3be35ff6-6a7b-49cf-ba3e-c610b781b54e", # store
|
||||
"some_dir", # filename
|
||||
"baad81ef-7cc6-48df-973c-e9f32ceda19a", # parent
|
||||
),
|
||||
)
|
||||
for row in cur.connection.iterdump():
|
||||
@ -258,7 +231,7 @@ def test_crossstore_directory_insert(insert_directories):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_files(insert_directories):
|
||||
def insert_files(insert_directories: sqlite3.Cursor) -> sqlite3.Cursor:
|
||||
cur = insert_directories
|
||||
cur.execute("SELECT COUNT(*) FROM filedir")
|
||||
(nprev,) = cur.fetchone()
|
||||
@ -328,7 +301,7 @@ def insert_files(insert_directories):
|
||||
# TODO: This test is disabled until triggers are added to check for these types
|
||||
# of constraints. These became much more complicated to check when I added
|
||||
# filedir_version.
|
||||
def disabled_test_nondir_parent_directory_insert(insert_files):
|
||||
def disabled_test_nondir_parent_directory_insert(insert_files: sqlite3.Cursor) -> None:
|
||||
cur = insert_files
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# declaring parent as 5, but 5 is a file (plots.png)
|
||||
|
||||
@ -1,19 +1,23 @@
|
||||
"""Test functionality of the Store class."""
|
||||
from nancy import store
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
import sys
|
||||
import tempfile
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bare_dir():
|
||||
def bare_dir() -> Iterator[Path]:
|
||||
"""Create an emptry temp directory"""
|
||||
with tempfile.TemporaryDirectory(prefix="nancy_testdir") as d:
|
||||
yield Path(d)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def filled_dir(bare_dir):
|
||||
def filled_dir(bare_dir: Path) -> Path:
|
||||
open(bare_dir / "a.txt", "w").write("foo")
|
||||
os.makedirs(bare_dir / "stats")
|
||||
open(bare_dir / "stats" / "metrics.csv", "w").write("bar,baz")
|
||||
@ -22,24 +26,23 @@ def filled_dir(bare_dir):
|
||||
return bare_dir
|
||||
|
||||
|
||||
def test_record_untracked_dir(filled_dir):
|
||||
def test_record_untracked_dir(filled_dir: Path) -> None:
|
||||
from nancy.cli.record import record
|
||||
|
||||
record(filled_dir, message="test_record_untracked_dir")
|
||||
record(store_path=filled_dir, message="test_record_untracked_dir")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def store():
|
||||
from nancy import store
|
||||
|
||||
def empty_store() -> Iterator[store.Store]:
|
||||
s = store.Store.init(message="test init")
|
||||
yield s
|
||||
|
||||
|
||||
def test_schema_version_match(store):
|
||||
def test_schema_version_match(empty_store: store.Store) -> None:
|
||||
from nancy.version import schema_version
|
||||
|
||||
cur = store.conn.cursor()
|
||||
assert empty_store.conn is not None
|
||||
cur = empty_store.conn.cursor()
|
||||
(db_schema_ver,) = cur.execute("PRAGMA user_version;").fetchone()
|
||||
|
||||
assert schema_version == db_schema_ver
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user