From e75af527656fda021be4966a2fa7592e56e07781 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Wed, 21 Sep 2022 13:37:05 -0400 Subject: [PATCH] Create {machine,fs}.py, file scanning, and updated tests This also fixes the pytest fixture setup (I think). --- src/nancy/fs.py | 209 +++++++++++++++++++++++++++++++++++++++++++ src/nancy/machine.py | 50 +++++++++++ src/nancy/schema.sql | 129 ++++++++++++++------------ src/nancy/store.py | 97 +++++++------------- tests/test_db.py | 162 +++++++++++++++++++++------------ 5 files changed, 469 insertions(+), 178 deletions(-) create mode 100644 src/nancy/fs.py create mode 100644 src/nancy/machine.py diff --git a/src/nancy/fs.py b/src/nancy/fs.py new file mode 100644 index 0000000..cdfff09 --- /dev/null +++ b/src/nancy/fs.py @@ -0,0 +1,209 @@ +"""Interaction with the filesystem and with file database entries""" + +from dataclasses import dataclass +import hashlib +import operator +import os +import stat +from typing import List +import warnings + + +def remove_write_perms(path): + """Remove write permissions for all users while preserving other perms""" + if not os.path.islink(path): + s = os.stat(path) + orig_perm_string = stat.filemode(s.st_mode) + os.chmod( + path, + s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH), + ) + follow_symlinks = False + else: + if os.stat not in os.supports_follow_symlinks: + # can't stat this thing directly on this platform + # means we can only stat the content. + # In this case, we return None and do not lock this link + warnings.warn("This platform cannot stat symlinks. Will not set them read-only.") + return None + s = os.stat(path) + orig_perm_string = stat.filemode(s.st_mode) + if os.chmod in os.supports_follow_symlinks: + follow_symlinks = True + else: + warnings.warn( + ": Platform does not support chmod of symlinks. " + "Links will not be set read-only.", + ) + return orig_perm_string + os.chmod( + path, + s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH), + follow_symlinks=follow_symlinks, + ) + return orig_perm_string + + +def make_readonly_recursive(path, excluded=[]): + """Recursively "freeze" a directory by setting all files and directories read-only""" + # traversing bottom-up makes it easier to freeze perms on directories + for root, dirs, files in os.walk(self.path, topdown=False): + for f in files: + p = os.path.join(root, f) + if p in excluded: + continue + remove_write_perms(os.path.join(self.path, p)) + + for d in dirs: + p = os.path.join(root, d) + if p in excluded: + continue + remove_write_perms(os.path.join(self.path, p)) + + +@dataclass +class FSEntry: + """A hashed file or directory.""" + name: str # with parent directory stripped. None if this is the root + relpath: str # relative to some root directory + # children for dirs only: non-recursive; files/dirs at this level only + children: List['FSEntry'] + filetype: str # regular, symlink, special (block, char, pipe, or socket) + perms: str # e.g. '-rw-rw-r--' + # For directories, sha256 is computed from concatenation of all children + # sorted alphabetically regardless of type using "{name}{hash}" format + sha256: bytes # sha256 of content for regular files, or of target for links + + def __post_init__(self): + # compute sha256 if it's missing + if self.sha256 is None: + m = hashlib.sha256() + for d in self.subdirs: + assert d.sha256 is not None + m.update(bytes(d.dirname, 'utf-8')) + m.update(d.sha256) + for f in self.files: + assert f.sha256 is not None + m.update(bytes(f.filename, 'utf-8')) + m.update(f.sha256) + self.sha256 = m.digest() + + @classmethod + def from_path(cls, root, relpath='.', filestat=None): + """ + Scan a path to instantiate (recursive). + """ + m = hashlib.sha256() + + path = os.path.join(root, relpath) + + filestat = os.stat(path) if filestat is None else filestat + s = filestat.st_mode + + children = [] + if stat.S_ISLNK(s): + # Check links first, since it is not exclusive with dir or file checks + filetype = 'LNK' + m.update(bytes(os.readlink(path), 'utf-8')) + print(path, "is link") + elif stat.S_ISDIR(s): + filetype = 'DIR' + + # this prevents a directory's hash from colliding with a file hash + # in cases where it only holds a single file + m.update(bytes(relpath, 'utf-8')) + + # we use os.scandir which returns a DirEntry for each child + # excluding "." and "..". These variables hold a .stat which we can + # use to avoid having to query the filesystem twice. + direntries = list(os.scandir(path)) + direntries.sort(key=operator.attrgetter('name')) + + children = [ + cls.from_path( + root=root, + relpath=os.path.join(relpath, e.name), + filestat=e.stat(), + ) + for e in direntries + ] + for c in children: # now hash concatenated sorted hashes + m.update(c.sha256) + elif stat.S_ISREG(s): + filetype = 'REG' + m.update(open(path, 'rb').read()) + elif stat.S_ISSOCK(s): + filetype = 'SOCK' + elif stat.S_ISCHR(s): + filetype = 'CHR' + elif stat.S_ISBLK(s): + filetype = 'BLK' + elif stat.S_ISFIFO(s): + filetype = 'FIFO' + elif stat.S_ISDOOR(s): + filetype = 'DOOR' + elif stat.S_ISPORT(s): + filetype = 'PORT' + elif stat.S_ISWHT(s): + filetype = 'WHT' + else: + filetype = 'OTHER' + + sha256 = m.digest() + + return cls( + name=os.path.basename(relpath), + relpath=relpath, + children=children, + filetype=filetype, + perms=stat.filemode(filestat.st_mode), + sha256=sha256, + ) + + @classmethod + def empty_root(cls): + """Just a standardized value indicating an empty root directory""" + return cls( + name='.', + relpath='.', + children=[], + filetype='DIR', + perms='----------', + sha256=hashlib.sha256().digest(), + ) + + @classmethod + def from_db_index(cls, cursor, root_id=None, root_row=None): + """Given id of an entry in store_file, recursively fill this object""" + fields = '(id, filename, filetype, unfrozen_perms, frozen, sha256, symlink_target)' + if root_row is None: + cursor.execute(f'SELECT {fields} FROM store_file WHERE id={root_id};') + root_row = cursor.fetchone() + root_id, name, filetype, unfrozen_perms, frozen, sha256, symlink_target = root_row + # get children + cursor.execute(f'SELECT {fields} FROM store_file WHERE parent={root_id} ORDER BY name;') + rows = cursor.fetchall() + return cls( + name=name, + relpath=relpath, + children=[cls.from_db_index(cursor, root_row=r) for r in rows], + filetype=filetype, + perms=unfrozen_perms, + sha256=bytes.fromhex(sha256), + ) + + +class FSDiff: + def __init__(A, B): + """Given two hashed directories, efficiently compute difference. + + This assumes the hashes are consistent, so that directories with + matching hashes need not be inspected deeply. + + Returns: + deleted (Directory): overlay with deleted entries from self + modified (Directory): overlay with modified entries from other + new (Directory): overlay with new entries from other + """ + self.A = A + self.B = B diff --git a/src/nancy/machine.py b/src/nancy/machine.py new file mode 100644 index 0000000..ac52420 --- /dev/null +++ b/src/nancy/machine.py @@ -0,0 +1,50 @@ +from collections import namedtuple +import json +import platform +import time + +MachineInfo = namedtuple( + 'MachineInfo', + [ + 'machine_id', + 'hostname', + 'processor', + 'system', + 'release', + 'cpu_type', + 'timezone', + 'freedesktop_os_release', + 'win32_ver', + 'mac_ver', + ], +) +def get_machine_info(): + fdor = '' + try: + fdor = json.dumps(platform.freedesktop_os_release()) + except AttributeError: + # freedesktop_os_release only available for python >= 3.10 + fdor = '' + system = platform.system() + + mid = None + if system == 'Linux': + try: + mid = open('/etc/machine-id', 'r').read() + except FileNotFoundError: + pass + + return MachineInfo( + machine_id=mid, + hostname=platform.node(), + processor=platform.processor(), + system=system, + release=platform.release(), + cpu_type=platform.machine(), + timezone=time.tzname[time.daylight], + freedesktop_os_release=fdor, + win32_ver=json.dumps(platform.win32_ver()), + mac_ver=json.dumps(platform.mac_ver()), + ) + + diff --git a/src/nancy/schema.sql b/src/nancy/schema.sql index 5d28841..4641980 100644 --- a/src/nancy/schema.sql +++ b/src/nancy/schema.sql @@ -13,11 +13,16 @@ PRAGMA foreign_keys=ON; -- come into play. Note that these are not reliable identifiers for purposes of -- tracking down data, but may be helpful context. CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL, + machine_id TEXT, -- platform-dependent unique hardware id + -- Linux: open('/etc/machine-id', 'r').read() (assumes systemd) + -- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID` + -- Windows: `reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography /v MachineGuid` + hostname TEXT, -- platform.node(): 'lucky' processor TEXT, -- platform.processor(): system TEXT, -- platform.system(): 'Linux' release TEXT, -- platform.release(): '5.15.64' - machine TEXT, -- platform.machine(): 'x86_64' + cpu_type TEXT, -- platform.machine(): 'x86_64' timezone TEXT, -- timezone, for interpreting event times freedesktop_os_release TEXT, -- requires python 3.10 -- platform.freedesktop_os_release() as JSON @@ -34,9 +39,10 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL, ); -- Programs are run by users on machines CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL, - username TEXT NOT NULL, - userid INTEGER, - fullname TEXT, + username TEXT NOT NULL, -- getpass.getuser() + userid INTEGER, -- os.getuid() + fullname TEXT, -- on Linux/OSX: pwd.getpwuid(os.getuid()).pw_gecos + -- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python machine INTEGER NOT NULL, UNIQUE(userid, machine), @@ -44,7 +50,7 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL, ); --- Stores, directories, and files +-- Stores and files (and directories) -- These are the primary objects tracked by nancy. -- A store is a directory containing a file called nancy.db (e.g. the dir holding this database) CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL, @@ -55,50 +61,54 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL, FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE ); --- Within the store are directories that contain files. --- Note that there should be one entry with relpath='.' for the non-imported store. -CREATE TABLE store_directory (id INTEGER PRIMARY KEY NOT NULL, - name TEXT, - store INTEGER NOT NULL, - parent INTEGER, -- parent directory (should be) in same store - frozen BOOL NOT NULL, - - UNIQUE(store, name, parent), - FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE, - -- parent is a recursive key within this table. - FOREIGN KEY (parent) REFERENCES store_directory (id) ON UPDATE CASCADE -); -CREATE TRIGGER insert_store_directory BEFORE INSERT ON store_directory -BEGIN - SELECT - CASE - WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent) - THEN RAISE (ABORT, 'Parent directory resides in different store') - END; -END; -CREATE TRIGGER update_store_directory BEFORE UPDATE ON store_directory -BEGIN - SELECT - CASE - WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent) - THEN RAISE (ABORT, 'Parent directory resides in different store') - END; -END; +-- The store_file table holds all files AND DIRECTORIES that are tracked by the +-- store. Files and directories are distinguished by the filetype column. This +-- table also holds tracked files and directories that have been imported and +-- live outside the current store. CREATE TABLE store_file (id INTEGER PRIMARY KEY NOT NULL, - directory INTEGER NOT NULL, -- reference a directory inside a store + store INTEGER NOT NULL, filename TEXT, -- only a filename, not a path - frozen BOOL NOT NULL, - unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + parent INTEGER REFERENCES store_file ON UPDATE CASCADE, + frozen BOOL NOT NULL, + filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target + + sha256 TEXT NOT NULL, + + source_program INTEGER, -- Note that this is redundant since datum points to a program... source_datum INTEGER, - UNIQUE(filename, directory), - FOREIGN KEY (directory) REFERENCES store_directory (id) ON UPDATE CASCADE, - FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE -); + UNIQUE(store, filename, parent), --- A computational environment which can execute "processes". Note that the + FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE, + FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE, + FOREIGN KEY (source_program) REFERENCES program (id) ON UPDATE CASCADE +); +CREATE TRIGGER insert_store_file BEFORE INSERT ON store_file +BEGIN + SELECT + CASE + --WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR' + --THEN RAISE (ABORT, 'Parent is not listed as a directory') + WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent) + THEN RAISE (ABORT, 'Parent directory resides in different store') + END; +END; +CREATE TRIGGER update_store_file BEFORE UPDATE ON store_file +BEGIN + SELECT + CASE + --WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR' + --THEN RAISE (ABORT, 'Parent is not listed as a directory') + WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent) + THEN RAISE (ABORT, 'Parent directory resides in different store') + END; +END; + +-- A computational environment which can execute "programs". Note that the -- python executable being used, and environment variables are recorded here. -- Other info is available in the parent "machine" table. CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL, @@ -111,30 +121,36 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL, FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE ); + -- A "program" is an execution of a single graph, starting from a number of roots --- It corresponds to a single host parent process. Note that if multiple calls to --- nancy.run() are provided within a single python script, multiple "programs" --- may be created. Also note that historical programs are automatically --- imported and merged when possible when loading a "datum" from disk. +-- Note that if multiple calls to nancy.run() are provided within a single +-- python script, multiple "programs" may be created. Also note that historical +-- programs are automatically imported and merged when possible when loading a +-- "datum" from disk. CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL, - name TEXT, -- name of the program, usually written in code e.g. cnn_crossval + name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval + -- Names of built-in operations will be shown in upper case: e.g. 'FREEZE' + -- we use POSIX timestamps in UTC for time recording. -- e.g. datetime.datetime.now().timestamp() start_time REAL, end_time REAL, + process_id INTEGER, -- host PID of python process on host OS environment INTEGER NOT NULL, message TEXT, -- user-defined message to help distinguish similar runs FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE ); + + -- We try to track all python packages that impact execution by traversing a --- copy of sys.modules. This is done once before a "process" and once after in +-- copy of sys.modules. This is done once before a "program" and once after in -- case some calling code winds up calling a previously-unloaded module. CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL, name TEXT NOT NULL, version TEXT, - process INTEGER NOT NULL, - FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE + program INTEGER NOT NULL, + FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE ); -- A py_module describes any python module file containing decorated Functions. -- Modules are tracked since they impact the global scope of function calls. @@ -183,20 +199,20 @@ CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL, -- Tasks are executed calls to Functions: they correspond to a particular set of -- inputs which are themselves data (see datum table). A task is executed in --- the context of a "process". Within a process, tasks are typically evaluated +-- the context of a "program". Within a program, tasks are typically evaluated -- in a serial manner. CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL, func INTEGER NOT NULL, - process INTEGER NOT NULL, + program INTEGER NOT NULL, FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE, - FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE + FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE ); -- A datum is an object that is computed as the output of a task, given as a -- literal value in a config file, or loaded from a file. CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL, -- provider_type describes where the datum came from. Choices are: -- - COMPUTED: output of a decorated Function - -- - IMPORTED: imported output from a prior process + -- - IMPORTED: imported output from a prior program -- - LITERAL: literal value provided in a config file -- Note that literals are implicitly imported (hash computed) at runtime provider_type TEXT, @@ -231,8 +247,9 @@ CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL, func_input INTEGER NOT NULL, datum INTEGER NOT NULL, - -- Data have versions to facilitate tacking non-const operations. If a datum - -- is passed to a non-const operation, a new datum is provided + -- Data have versions to facilitate tracking non-const operations. If a datum + -- is passed to a non-const operation, it must increment its internal + -- version datum_version INTEGER NOT NULL, FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE, diff --git a/src/nancy/store.py b/src/nancy/store.py index 4515e5b..200e5f2 100644 --- a/src/nancy/store.py +++ b/src/nancy/store.py @@ -1,26 +1,14 @@ """Utilities for creating new stores and linking between them.""" -from . import db +from . import db, fs, machine import importlib import json import os from pathlib import Path import sqlite3 -import stat -def remove_write_perms(path): - """Remove write permissions for all users while preserving other perms""" - s = os.stat(path) - orig_perm_string = stat.filemode(s) - os.chmod( - path, - s.st_mode ^ stat.S_IWUSR ^ stat.S_IWGRP ^ stat.S_IWOTH, - follow_symlinks=False, - ) - return orig_perm_string - class Store: """Describes a data directory, holds active connection to nancy.db""" @@ -82,63 +70,26 @@ class Store: def record_machine_description(self): """Record machine-specific information""" - import platform, time cur = self.conn.cursor() - fdor = '' - try: - fdor = json.dumps(platform.freedesktop_os_release()) - except AttributeError: - # freedesktop_os_release only available for python >= 3.10 - fdor = '' - machine_info = ( - platform.node(), - platform.processor(), - platform.system(), - platform.release(), - platform.machine(), - time.tzname[time.daylight], - fdor, - json.dumps(platform.win32_ver()), - json.dumps(platform.mac_ver()), - ) - cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?)', machine_info) + machine_info = machine.get_machine_info() + cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info) + self.machine_id = cur.lastrowid self.conn.commit() def make_readonly(self): """Make store directory read-only (except for nancy.db) and return file list""" - dirpaths = {} - regfilepaths = {} - symlinkpaths = {} - irregfilepaths = {} # any file that's not regular or symlink. e.g. inodes or sockets + fs.make_readonly_recursive(self.path, excluded='./nancy.db') - # traversing bottom-up makes it easier to freeze perms on directories - for root, dirs, files in os.walk(self.path, topdown=False): - for f in files: - p = os.path.join(root, f) - if p == './nancy.db': - continue - - orig_perms = remove_write_perms(os.path.join(self.path, p)) - - if not os.path.isfile(p): - # not a link or regular file - irregfilepaths[p] = orig_perms - - if os.path.islink(p): - symlinkpaths[p] = orig_perms - else: - regfilepaths[p] = orig_perms - - for d in dirs: - p = os.path.join(root, d) - - orig_perms = remove_write_perms(os.path.join(self.path, p)) - - dirpaths[p] = orig_perms - return dirpaths, regfilepaths, symlinkpaths, irregfilepaths - - def file_hashes(self, local_only=True): - """Get recorded hashes of all files currently tracked in this store""" + def fs_entries(self, shallow=False): + """Return recursive structure containing FSEntry objects from db""" + # get the database id for the table entry in this store having name '.' + cur = self.conn.cursor() + cur.execute('SELECT id FROM store_file WHERE store=0 AND parent is NULL;') + root_id = cur.fetchone() + if root_id is None: + return fs.FSEntry.empty_root() + else: + return fs.FSEntry.from_db_index(cur, root_id=root_id) def freeze(self): @@ -152,7 +103,7 @@ class Store: ", ".join(irreg), ) for p, perms in symlinks.items(): - target = os.path.readlink(p) # gives possibly relative path to tgt + target = os.readlink(p) # gives possibly relative path to target resolved = os.path.realpath(p) if not resolved.startswith(os.path.abspath(self.path)): raise Exception( @@ -160,10 +111,24 @@ class Store: f"{target} which is outside store path ({self.path})", ) - # compute checksums on all files + # get hashes of current directory (recursive) + current = fs.FSEntry.from_path(self.path) + + # extract hashes for all previously-cataloged files/dirs + recorded = self.fs_entries(shallow=True) + + d = current.diff(recorded) + + # select all dirs # detect new and deleted files, and those that have changed type + # create entries for all directories if they do not yet exist (top + # down) + + # insert files and symlinks into store_file, computing checksums on + # each + # update versions in nancy.db as appropriate # remove write permissions on nancy.db diff --git a/tests/test_db.py b/tests/test_db.py index 4911e0d..7bcd11e 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -8,7 +8,7 @@ import sqlite3 @pytest.fixture -def db(): +def temp_db(): """Create an in-memory database that follow's the nancy schema""" conn = sqlite3.connect(':memory:') cur = conn.cursor() @@ -23,13 +23,15 @@ def db(): conn.close() -@pytest.fixture(autouse=True) -def insert_machine(db): - db.executemany( +@pytest.fixture +def insert_machine(temp_db): + cur = temp_db + cur.executemany( 'INSERT INTO machine VALUES ' - '(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, + 'a5d97c08a15c4db69f5fded523a1bfe3', #machine_id TEXT, -- platform-dependent unique hardware id 'lucky', #hostname TEXT, -- platform.node(): 'lucky' '', #processor TEXT, -- platform.processor(): 'Linux', #system TEXT, -- platform.system(): 'Linux' @@ -40,7 +42,8 @@ def insert_machine(db): '', #win32_ver TEXT, -- platform.win32_ver() as JSON '', #mac_ver TEXT -- platform.mac_ver() as JSON ), ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, + 'afc9b06a23b74341b29d42b8312a4f8a', 'a100', #hostname TEXT, -- platform.node(): 'lucky' '', #processor TEXT, -- platform.processor(): 'Linux', #system TEXT, -- platform.system(): 'Linux' @@ -52,48 +55,55 @@ def insert_machine(db): '', #mac_ver TEXT -- platform.mac_ver() as JSON )], ) -def test_insert_machine(db): - db.execute('SELECT * FROM machine') - machines = db.fetchall() + return cur + +def test_insert_machine(insert_machine): + cur = insert_machine + cur.execute('SELECT * FROM machine') + machines = cur.fetchall() assert len(machines) == 2 -@pytest.fixture(autouse=True) -def insert_user(db): - db.executemany( +@pytest.fixture +def insert_user(insert_machine): + cur = insert_machine + cur.executemany( 'INSERT INTO user VALUES ' '(?, ?, ?, ?, ?)', [( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 'jacob', #username TEXT NOT NULL, 101, #userid INTEGER, 'Jacob Hinkle', #fullname TEXT, 1, #machine INTEGER NOT NULL, ), ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 'jacob', #username TEXT NOT NULL, 10301, #userid INTEGER, 'Jacob Hinkle', #fullname TEXT, 2, #machine INTEGER NOT NULL, ), ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 'bob', #username TEXT NOT NULL, 2035, #userid INTEGER, 'Just Bob', #fullname TEXT, 2, #machine INTEGER NOT NULL, )], ) -def test_insert_user(db): - db.execute('SELECT * FROM user') - users = db.fetchall() + return cur +def test_insert_user(insert_user): + cur = insert_user + cur.execute('SELECT * FROM user') + users = cur.fetchall() assert len(users) == 3 -def test_invalid_user_machine(db): +def test_invalid_user_machine(insert_user): + cur = insert_user with pytest.raises(sqlite3.IntegrityError): # should fail foreign key constraint - db.execute( + cur.execute( 'INSERT INTO user VALUES ' '(?, ?, ?, ?, ?)', ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 'bozo', #username TEXT NOT NULL, 100, #userid INTEGER, 'Bozo the Clown', #fullname TEXT, @@ -102,11 +112,11 @@ def test_invalid_user_machine(db): ) with pytest.raises(sqlite3.IntegrityError): # should fail uniqueness constraint - db.execute( + cur.execute( 'INSERT INTO user VALUES ' '(?, ?, ?, ?, ?)', ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 'jacob', #username TEXT NOT NULL, 101, #userid INTEGER, 'Bozo the Clown', #fullname TEXT, @@ -115,81 +125,118 @@ def test_invalid_user_machine(db): ) -@pytest.fixture(autouse=True) -def insert_store(db): - db.executemany( +@pytest.fixture +def insert_store(insert_machine): + cur = insert_machine + cur.executemany( 'INSERT INTO store VALUES ' '(?, ?, ?, ?)', [( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 1, #machine INTEGER, '/path/to/first/store', #dbpath TEXT NOT NULL, False, #imported BOOL, ), ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 1, #machine INTEGER, - '/path/to/dependencys/store', #dbpath TEXT NOT NULL, + '/path/to/dependency/store', #dbpath TEXT NOT NULL, True, #imported BOOL, ), ( - None, + None, #id INTEGER PRIMARY KEY NOT NULL, 2, #machine INTEGER, # same path but on a separate machine '/path/to/first/store', #dbpath TEXT NOT NULL, True, #imported BOOL, )], ) -@pytest.fixture(autouse=True) -def insert_store_directory(db): - db.executemany( - 'INSERT INTO store_directory VALUES ' - '(?, ?, ?, ?, ?)', + return cur + +@pytest.fixture +def insert_directories(insert_store): + cur = insert_store + cur.executemany( + 'INSERT INTO store_file VALUES ' + '(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [( - None, - '.', #name TEXT, + None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, - None, #parent INTEGER, -- parent directory (should be) in same store + '.', #filename TEXT, -- only a filename, not a path + None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE, False, #frozen BOOL NOT NULL, + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + 'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, -- Note that this is redundant since datum points to a program... + None, #source_datum INTEGER, ), ( - None, - 'foo', #name TEXT, + None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, - 1, #parent INTEGER, -- parent directory (should be) in same store + 'foo', #filename TEXT, -- only a filename, not a path + 1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE, False, #frozen BOOL NOT NULL, + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + 'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target + '5ad4e9e3090de8de781e4a35ce6ba16ad8eaba7a5456cd230a36f77143885396', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, -- Note that this is redundant since datum points to a program... + None, #source_datum INTEGER, ), ( - None, - '.', #name TEXT, + None, #id INTEGER PRIMARY KEY NOT NULL, 2, #store INTEGER NOT NULL, - None, #parent INTEGER, -- parent directory (should be) in same store + '.', #filename TEXT, -- only a filename, not a path + None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE, False, #frozen BOOL NOT NULL, + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + 'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target + '15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, -- Note that this is redundant since datum points to a program... + None, #source_datum INTEGER, )], ) + return cur -def test_crossstore_directory_insert(db): +def test_crossstore_directory_insert(insert_directories): + cur = insert_directories with pytest.raises(sqlite3.IntegrityError): # declaring directory as belonging to store 2, but parent's store is 1 - db.execute( - 'INSERT INTO store_directory VALUES ' - '(?, ?, ?, ?, ?)', + cur.execute( + 'INSERT INTO store_file VALUES ' + '(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', ( - None, - '.', #name TEXT, + None, #id INTEGER PRIMARY KEY NOT NULL, 2, #store INTEGER NOT NULL, - 1, #parent INTEGER, -- parent directory (should be) in same store + 'xstore_file', #filename TEXT, -- only a filename, not a path + 1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE, False, #frozen BOOL NOT NULL, + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + 'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target + '15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, -- Note that this is redundant since datum points to a program... + None, #source_datum INTEGER, )) + for row in cur.connection.iterdump(): + print(row) + cur.execute('SELECT * FROM store_file') + print(cur.fetchall()) -@pytest.fixture(autouse=True) -def insert_store_file(db): - db.executemany( +@pytest.fixture +def insert_files(insert_directories): + cur = insert_directories + cur.executemany( 'INSERT INTO store_file VALUES ' - '(?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?, ?, ?, ?)', [( None, 1, #directory INTEGER NOT NULL 'example.csv', #filename TEXT 1, #frozen BOOL NOT NULL '-rw-rw-r--', #unfrozen_perms TEXT + '9aafde8f9dbec34c694b86333f746f58958c44247c474904e06d1f07f94292b4', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, None, #source_datum INTEGER, ), ( None, @@ -197,7 +244,10 @@ def insert_store_file(db): 'plots.png', #filename TEXT 1, #frozen BOOL NOT NULL '-rw-r--r--', #unfrozen_perms TEXT + '9add10cc3a6f0e4618dfed005ddfbeafdf268c58b773ba0021963c856d00235b', #sha256 TEXT NOT NULL, + None, #source_program INTEGER, None, #source_datum INTEGER, )] ) + return cur