Create {machine,fs}.py, file scanning, and updated tests

This also fixes the pytest fixture setup (I think).
This commit is contained in:
Jacob Hinkle 2022-09-21 13:37:05 -04:00
parent c30ca6a083
commit e75af52765
5 changed files with 469 additions and 178 deletions

209
src/nancy/fs.py Normal file
View File

@ -0,0 +1,209 @@
"""Interaction with the filesystem and with file database entries"""
from dataclasses import dataclass
import hashlib
import operator
import os
import stat
from typing import List
import warnings
def remove_write_perms(path):
"""Remove write permissions for all users while preserving other perms"""
if not os.path.islink(path):
s = os.stat(path)
orig_perm_string = stat.filemode(s.st_mode)
os.chmod(
path,
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
)
follow_symlinks = False
else:
if os.stat not in os.supports_follow_symlinks:
# can't stat this thing directly on this platform
# means we can only stat the content.
# In this case, we return None and do not lock this link
warnings.warn("This platform cannot stat symlinks. Will not set them read-only.")
return None
s = os.stat(path)
orig_perm_string = stat.filemode(s.st_mode)
if os.chmod in os.supports_follow_symlinks:
follow_symlinks = True
else:
warnings.warn(
": Platform does not support chmod of symlinks. "
"Links will not be set read-only.",
)
return orig_perm_string
os.chmod(
path,
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
follow_symlinks=follow_symlinks,
)
return orig_perm_string
def make_readonly_recursive(path, excluded=[]):
"""Recursively "freeze" a directory by setting all files and directories read-only"""
# traversing bottom-up makes it easier to freeze perms on directories
for root, dirs, files in os.walk(self.path, topdown=False):
for f in files:
p = os.path.join(root, f)
if p in excluded:
continue
remove_write_perms(os.path.join(self.path, p))
for d in dirs:
p = os.path.join(root, d)
if p in excluded:
continue
remove_write_perms(os.path.join(self.path, p))
@dataclass
class FSEntry:
"""A hashed file or directory."""
name: str # with parent directory stripped. None if this is the root
relpath: str # relative to some root directory
# children for dirs only: non-recursive; files/dirs at this level only
children: List['FSEntry']
filetype: str # regular, symlink, special (block, char, pipe, or socket)
perms: str # e.g. '-rw-rw-r--'
# For directories, sha256 is computed from concatenation of all children
# sorted alphabetically regardless of type using "{name}{hash}" format
sha256: bytes # sha256 of content for regular files, or of target for links
def __post_init__(self):
# compute sha256 if it's missing
if self.sha256 is None:
m = hashlib.sha256()
for d in self.subdirs:
assert d.sha256 is not None
m.update(bytes(d.dirname, 'utf-8'))
m.update(d.sha256)
for f in self.files:
assert f.sha256 is not None
m.update(bytes(f.filename, 'utf-8'))
m.update(f.sha256)
self.sha256 = m.digest()
@classmethod
def from_path(cls, root, relpath='.', filestat=None):
"""
Scan a path to instantiate (recursive).
"""
m = hashlib.sha256()
path = os.path.join(root, relpath)
filestat = os.stat(path) if filestat is None else filestat
s = filestat.st_mode
children = []
if stat.S_ISLNK(s):
# Check links first, since it is not exclusive with dir or file checks
filetype = 'LNK'
m.update(bytes(os.readlink(path), 'utf-8'))
print(path, "is link")
elif stat.S_ISDIR(s):
filetype = 'DIR'
# this prevents a directory's hash from colliding with a file hash
# in cases where it only holds a single file
m.update(bytes(relpath, 'utf-8'))
# we use os.scandir which returns a DirEntry for each child
# excluding "." and "..". These variables hold a .stat which we can
# use to avoid having to query the filesystem twice.
direntries = list(os.scandir(path))
direntries.sort(key=operator.attrgetter('name'))
children = [
cls.from_path(
root=root,
relpath=os.path.join(relpath, e.name),
filestat=e.stat(),
)
for e in direntries
]
for c in children: # now hash concatenated sorted hashes
m.update(c.sha256)
elif stat.S_ISREG(s):
filetype = 'REG'
m.update(open(path, 'rb').read())
elif stat.S_ISSOCK(s):
filetype = 'SOCK'
elif stat.S_ISCHR(s):
filetype = 'CHR'
elif stat.S_ISBLK(s):
filetype = 'BLK'
elif stat.S_ISFIFO(s):
filetype = 'FIFO'
elif stat.S_ISDOOR(s):
filetype = 'DOOR'
elif stat.S_ISPORT(s):
filetype = 'PORT'
elif stat.S_ISWHT(s):
filetype = 'WHT'
else:
filetype = 'OTHER'
sha256 = m.digest()
return cls(
name=os.path.basename(relpath),
relpath=relpath,
children=children,
filetype=filetype,
perms=stat.filemode(filestat.st_mode),
sha256=sha256,
)
@classmethod
def empty_root(cls):
"""Just a standardized value indicating an empty root directory"""
return cls(
name='.',
relpath='.',
children=[],
filetype='DIR',
perms='----------',
sha256=hashlib.sha256().digest(),
)
@classmethod
def from_db_index(cls, cursor, root_id=None, root_row=None):
"""Given id of an entry in store_file, recursively fill this object"""
fields = '(id, filename, filetype, unfrozen_perms, frozen, sha256, symlink_target)'
if root_row is None:
cursor.execute(f'SELECT {fields} FROM store_file WHERE id={root_id};')
root_row = cursor.fetchone()
root_id, name, filetype, unfrozen_perms, frozen, sha256, symlink_target = root_row
# get children
cursor.execute(f'SELECT {fields} FROM store_file WHERE parent={root_id} ORDER BY name;')
rows = cursor.fetchall()
return cls(
name=name,
relpath=relpath,
children=[cls.from_db_index(cursor, root_row=r) for r in rows],
filetype=filetype,
perms=unfrozen_perms,
sha256=bytes.fromhex(sha256),
)
class FSDiff:
def __init__(A, B):
"""Given two hashed directories, efficiently compute difference.
This assumes the hashes are consistent, so that directories with
matching hashes need not be inspected deeply.
Returns:
deleted (Directory): overlay with deleted entries from self
modified (Directory): overlay with modified entries from other
new (Directory): overlay with new entries from other
"""
self.A = A
self.B = B

50
src/nancy/machine.py Normal file
View File

@ -0,0 +1,50 @@
from collections import namedtuple
import json
import platform
import time
MachineInfo = namedtuple(
'MachineInfo',
[
'machine_id',
'hostname',
'processor',
'system',
'release',
'cpu_type',
'timezone',
'freedesktop_os_release',
'win32_ver',
'mac_ver',
],
)
def get_machine_info():
fdor = ''
try:
fdor = json.dumps(platform.freedesktop_os_release())
except AttributeError:
# freedesktop_os_release only available for python >= 3.10
fdor = ''
system = platform.system()
mid = None
if system == 'Linux':
try:
mid = open('/etc/machine-id', 'r').read()
except FileNotFoundError:
pass
return MachineInfo(
machine_id=mid,
hostname=platform.node(),
processor=platform.processor(),
system=system,
release=platform.release(),
cpu_type=platform.machine(),
timezone=time.tzname[time.daylight],
freedesktop_os_release=fdor,
win32_ver=json.dumps(platform.win32_ver()),
mac_ver=json.dumps(platform.mac_ver()),
)

View File

@ -13,11 +13,16 @@ PRAGMA foreign_keys=ON;
-- come into play. Note that these are not reliable identifiers for purposes of
-- tracking down data, but may be helpful context.
CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
machine_id TEXT, -- platform-dependent unique hardware id
-- Linux: open('/etc/machine-id', 'r').read() (assumes systemd)
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID`
-- Windows: `reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography /v MachineGuid`
hostname TEXT, -- platform.node(): 'lucky'
processor TEXT, -- platform.processor():
system TEXT, -- platform.system(): 'Linux'
release TEXT, -- platform.release(): '5.15.64'
machine TEXT, -- platform.machine(): 'x86_64'
cpu_type TEXT, -- platform.machine(): 'x86_64'
timezone TEXT, -- timezone, for interpreting event times
freedesktop_os_release TEXT, -- requires python 3.10
-- platform.freedesktop_os_release() as JSON
@ -34,9 +39,10 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
);
-- Programs are run by users on machines
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
username TEXT NOT NULL,
userid INTEGER,
fullname TEXT,
username TEXT NOT NULL, -- getpass.getuser()
userid INTEGER, -- os.getuid()
fullname TEXT, -- on Linux/OSX: pwd.getpwuid(os.getuid()).pw_gecos
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine INTEGER NOT NULL,
UNIQUE(userid, machine),
@ -44,7 +50,7 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
);
-- Stores, directories, and files
-- Stores and files (and directories)
-- These are the primary objects tracked by nancy.
-- A store is a directory containing a file called nancy.db (e.g. the dir holding this database)
CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
@ -55,50 +61,54 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
);
-- Within the store are directories that contain files.
-- Note that there should be one entry with relpath='.' for the non-imported store.
CREATE TABLE store_directory (id INTEGER PRIMARY KEY NOT NULL,
name TEXT,
store INTEGER NOT NULL,
parent INTEGER, -- parent directory (should be) in same store
frozen BOOL NOT NULL,
UNIQUE(store, name, parent),
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE,
-- parent is a recursive key within this table.
FOREIGN KEY (parent) REFERENCES store_directory (id) ON UPDATE CASCADE
);
CREATE TRIGGER insert_store_directory BEFORE INSERT ON store_directory
BEGIN
SELECT
CASE
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END;
END;
CREATE TRIGGER update_store_directory BEFORE UPDATE ON store_directory
BEGIN
SELECT
CASE
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END;
END;
-- The store_file table holds all files AND DIRECTORIES that are tracked by the
-- store. Files and directories are distinguished by the filetype column. This
-- table also holds tracked files and directories that have been imported and
-- live outside the current store.
CREATE TABLE store_file (id INTEGER PRIMARY KEY NOT NULL,
directory INTEGER NOT NULL, -- reference a directory inside a store
store INTEGER NOT NULL,
filename TEXT, -- only a filename, not a path
frozen BOOL NOT NULL,
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
frozen BOOL NOT NULL,
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
sha256 TEXT NOT NULL,
source_program INTEGER, -- Note that this is redundant since datum points to a program...
source_datum INTEGER,
UNIQUE(filename, directory),
FOREIGN KEY (directory) REFERENCES store_directory (id) ON UPDATE CASCADE,
FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE
);
UNIQUE(store, filename, parent),
-- A computational environment which can execute "processes". Note that the
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE,
FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE,
FOREIGN KEY (source_program) REFERENCES program (id) ON UPDATE CASCADE
);
CREATE TRIGGER insert_store_file BEFORE INSERT ON store_file
BEGIN
SELECT
CASE
--WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR'
--THEN RAISE (ABORT, 'Parent is not listed as a directory')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END;
END;
CREATE TRIGGER update_store_file BEFORE UPDATE ON store_file
BEGIN
SELECT
CASE
--WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR'
--THEN RAISE (ABORT, 'Parent is not listed as a directory')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END;
END;
-- A computational environment which can execute "programs". Note that the
-- python executable being used, and environment variables are recorded here.
-- Other info is available in the parent "machine" table.
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
@ -111,30 +121,36 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
);
-- A "program" is an execution of a single graph, starting from a number of roots
-- It corresponds to a single host parent process. Note that if multiple calls to
-- nancy.run() are provided within a single python script, multiple "programs"
-- may be created. Also note that historical programs are automatically
-- imported and merged when possible when loading a "datum" from disk.
-- Note that if multiple calls to nancy.run() are provided within a single
-- python script, multiple "programs" may be created. Also note that historical
-- programs are automatically imported and merged when possible when loading a
-- "datum" from disk.
CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
name TEXT, -- name of the program, usually written in code e.g. cnn_crossval
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
-- we use POSIX timestamps in UTC for time recording.
-- e.g. datetime.datetime.now().timestamp()
start_time REAL,
end_time REAL,
process_id INTEGER, -- host PID of python process on host OS
environment INTEGER NOT NULL,
message TEXT, -- user-defined message to help distinguish similar runs
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
);
-- We try to track all python packages that impact execution by traversing a
-- copy of sys.modules. This is done once before a "process" and once after in
-- copy of sys.modules. This is done once before a "program" and once after in
-- case some calling code winds up calling a previously-unloaded module.
CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
version TEXT,
process INTEGER NOT NULL,
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
program INTEGER NOT NULL,
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
);
-- A py_module describes any python module file containing decorated Functions.
-- Modules are tracked since they impact the global scope of function calls.
@ -183,20 +199,20 @@ CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
-- Tasks are executed calls to Functions: they correspond to a particular set of
-- inputs which are themselves data (see datum table). A task is executed in
-- the context of a "process". Within a process, tasks are typically evaluated
-- the context of a "program". Within a program, tasks are typically evaluated
-- in a serial manner.
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
func INTEGER NOT NULL,
process INTEGER NOT NULL,
program INTEGER NOT NULL,
FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE,
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
);
-- A datum is an object that is computed as the output of a task, given as a
-- literal value in a config file, or loaded from a file.
CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
-- provider_type describes where the datum came from. Choices are:
-- - COMPUTED: output of a decorated Function
-- - IMPORTED: imported output from a prior process
-- - IMPORTED: imported output from a prior program
-- - LITERAL: literal value provided in a config file
-- Note that literals are implicitly imported (hash computed) at runtime
provider_type TEXT,
@ -231,8 +247,9 @@ CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
func_input INTEGER NOT NULL,
datum INTEGER NOT NULL,
-- Data have versions to facilitate tacking non-const operations. If a datum
-- is passed to a non-const operation, a new datum is provided
-- Data have versions to facilitate tracking non-const operations. If a datum
-- is passed to a non-const operation, it must increment its internal
-- version
datum_version INTEGER NOT NULL,
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,

View File

@ -1,26 +1,14 @@
"""Utilities for creating new stores and linking between them."""
from . import db
from . import db, fs, machine
import importlib
import json
import os
from pathlib import Path
import sqlite3
import stat
def remove_write_perms(path):
"""Remove write permissions for all users while preserving other perms"""
s = os.stat(path)
orig_perm_string = stat.filemode(s)
os.chmod(
path,
s.st_mode ^ stat.S_IWUSR ^ stat.S_IWGRP ^ stat.S_IWOTH,
follow_symlinks=False,
)
return orig_perm_string
class Store:
"""Describes a data directory, holds active connection to nancy.db"""
@ -82,63 +70,26 @@ class Store:
def record_machine_description(self):
"""Record machine-specific information"""
import platform, time
cur = self.conn.cursor()
fdor = ''
try:
fdor = json.dumps(platform.freedesktop_os_release())
except AttributeError:
# freedesktop_os_release only available for python >= 3.10
fdor = ''
machine_info = (
platform.node(),
platform.processor(),
platform.system(),
platform.release(),
platform.machine(),
time.tzname[time.daylight],
fdor,
json.dumps(platform.win32_ver()),
json.dumps(platform.mac_ver()),
)
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?)', machine_info)
machine_info = machine.get_machine_info()
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info)
self.machine_id = cur.lastrowid
self.conn.commit()
def make_readonly(self):
"""Make store directory read-only (except for nancy.db) and return file list"""
dirpaths = {}
regfilepaths = {}
symlinkpaths = {}
irregfilepaths = {} # any file that's not regular or symlink. e.g. inodes or sockets
fs.make_readonly_recursive(self.path, excluded='./nancy.db')
# traversing bottom-up makes it easier to freeze perms on directories
for root, dirs, files in os.walk(self.path, topdown=False):
for f in files:
p = os.path.join(root, f)
if p == './nancy.db':
continue
orig_perms = remove_write_perms(os.path.join(self.path, p))
if not os.path.isfile(p):
# not a link or regular file
irregfilepaths[p] = orig_perms
if os.path.islink(p):
symlinkpaths[p] = orig_perms
else:
regfilepaths[p] = orig_perms
for d in dirs:
p = os.path.join(root, d)
orig_perms = remove_write_perms(os.path.join(self.path, p))
dirpaths[p] = orig_perms
return dirpaths, regfilepaths, symlinkpaths, irregfilepaths
def file_hashes(self, local_only=True):
"""Get recorded hashes of all files currently tracked in this store"""
def fs_entries(self, shallow=False):
"""Return recursive structure containing FSEntry objects from db"""
# get the database id for the table entry in this store having name '.'
cur = self.conn.cursor()
cur.execute('SELECT id FROM store_file WHERE store=0 AND parent is NULL;')
root_id = cur.fetchone()
if root_id is None:
return fs.FSEntry.empty_root()
else:
return fs.FSEntry.from_db_index(cur, root_id=root_id)
def freeze(self):
@ -152,7 +103,7 @@ class Store:
", ".join(irreg),
)
for p, perms in symlinks.items():
target = os.path.readlink(p) # gives possibly relative path to tgt
target = os.readlink(p) # gives possibly relative path to target
resolved = os.path.realpath(p)
if not resolved.startswith(os.path.abspath(self.path)):
raise Exception(
@ -160,10 +111,24 @@ class Store:
f"{target} which is outside store path ({self.path})",
)
# compute checksums on all files
# get hashes of current directory (recursive)
current = fs.FSEntry.from_path(self.path)
# extract hashes for all previously-cataloged files/dirs
recorded = self.fs_entries(shallow=True)
d = current.diff(recorded)
# select all dirs
# detect new and deleted files, and those that have changed type
# create entries for all directories if they do not yet exist (top
# down)
# insert files and symlinks into store_file, computing checksums on
# each
# update versions in nancy.db as appropriate
# remove write permissions on nancy.db

View File

@ -8,7 +8,7 @@ import sqlite3
@pytest.fixture
def db():
def temp_db():
"""Create an in-memory database that follow's the nancy schema"""
conn = sqlite3.connect(':memory:')
cur = conn.cursor()
@ -23,13 +23,15 @@ def db():
conn.close()
@pytest.fixture(autouse=True)
def insert_machine(db):
db.executemany(
@pytest.fixture
def insert_machine(temp_db):
cur = temp_db
cur.executemany(
'INSERT INTO machine VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'a5d97c08a15c4db69f5fded523a1bfe3', #machine_id TEXT, -- platform-dependent unique hardware id
'lucky', #hostname TEXT, -- platform.node(): 'lucky'
'', #processor TEXT, -- platform.processor():
'Linux', #system TEXT, -- platform.system(): 'Linux'
@ -40,7 +42,8 @@ def insert_machine(db):
'', #win32_ver TEXT, -- platform.win32_ver() as JSON
'', #mac_ver TEXT -- platform.mac_ver() as JSON
), (
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'afc9b06a23b74341b29d42b8312a4f8a',
'a100', #hostname TEXT, -- platform.node(): 'lucky'
'', #processor TEXT, -- platform.processor():
'Linux', #system TEXT, -- platform.system(): 'Linux'
@ -52,48 +55,55 @@ def insert_machine(db):
'', #mac_ver TEXT -- platform.mac_ver() as JSON
)],
)
def test_insert_machine(db):
db.execute('SELECT * FROM machine')
machines = db.fetchall()
return cur
def test_insert_machine(insert_machine):
cur = insert_machine
cur.execute('SELECT * FROM machine')
machines = cur.fetchall()
assert len(machines) == 2
@pytest.fixture(autouse=True)
def insert_user(db):
db.executemany(
@pytest.fixture
def insert_user(insert_machine):
cur = insert_machine
cur.executemany(
'INSERT INTO user VALUES '
'(?, ?, ?, ?, ?)',
[(
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'jacob', #username TEXT NOT NULL,
101, #userid INTEGER,
'Jacob Hinkle', #fullname TEXT,
1, #machine INTEGER NOT NULL,
), (
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'jacob', #username TEXT NOT NULL,
10301, #userid INTEGER,
'Jacob Hinkle', #fullname TEXT,
2, #machine INTEGER NOT NULL,
), (
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'bob', #username TEXT NOT NULL,
2035, #userid INTEGER,
'Just Bob', #fullname TEXT,
2, #machine INTEGER NOT NULL,
)],
)
def test_insert_user(db):
db.execute('SELECT * FROM user')
users = db.fetchall()
return cur
def test_insert_user(insert_user):
cur = insert_user
cur.execute('SELECT * FROM user')
users = cur.fetchall()
assert len(users) == 3
def test_invalid_user_machine(db):
def test_invalid_user_machine(insert_user):
cur = insert_user
with pytest.raises(sqlite3.IntegrityError):
# should fail foreign key constraint
db.execute(
cur.execute(
'INSERT INTO user VALUES '
'(?, ?, ?, ?, ?)',
(
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'bozo', #username TEXT NOT NULL,
100, #userid INTEGER,
'Bozo the Clown', #fullname TEXT,
@ -102,11 +112,11 @@ def test_invalid_user_machine(db):
)
with pytest.raises(sqlite3.IntegrityError):
# should fail uniqueness constraint
db.execute(
cur.execute(
'INSERT INTO user VALUES '
'(?, ?, ?, ?, ?)',
(
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
'jacob', #username TEXT NOT NULL,
101, #userid INTEGER,
'Bozo the Clown', #fullname TEXT,
@ -115,81 +125,118 @@ def test_invalid_user_machine(db):
)
@pytest.fixture(autouse=True)
def insert_store(db):
db.executemany(
@pytest.fixture
def insert_store(insert_machine):
cur = insert_machine
cur.executemany(
'INSERT INTO store VALUES '
'(?, ?, ?, ?)',
[(
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #machine INTEGER,
'/path/to/first/store', #dbpath TEXT NOT NULL,
False, #imported BOOL,
), (
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #machine INTEGER,
'/path/to/dependencys/store', #dbpath TEXT NOT NULL,
'/path/to/dependency/store', #dbpath TEXT NOT NULL,
True, #imported BOOL,
), (
None,
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #machine INTEGER,
# same path but on a separate machine
'/path/to/first/store', #dbpath TEXT NOT NULL,
True, #imported BOOL,
)],
)
@pytest.fixture(autouse=True)
def insert_store_directory(db):
db.executemany(
'INSERT INTO store_directory VALUES '
'(?, ?, ?, ?, ?)',
return cur
@pytest.fixture
def insert_directories(insert_store):
cur = insert_store
cur.executemany(
'INSERT INTO store_file VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None,
'.', #name TEXT,
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
None, #parent INTEGER, -- parent directory (should be) in same store
'.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
False, #frozen BOOL NOT NULL,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', #sha256 TEXT NOT NULL,
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
None, #source_datum INTEGER,
), (
None,
'foo', #name TEXT,
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
1, #parent INTEGER, -- parent directory (should be) in same store
'foo', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
False, #frozen BOOL NOT NULL,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
'5ad4e9e3090de8de781e4a35ce6ba16ad8eaba7a5456cd230a36f77143885396', #sha256 TEXT NOT NULL,
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
None, #source_datum INTEGER,
), (
None,
'.', #name TEXT,
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL,
None, #parent INTEGER, -- parent directory (should be) in same store
'.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
False, #frozen BOOL NOT NULL,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
'15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL,
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
None, #source_datum INTEGER,
)],
)
return cur
def test_crossstore_directory_insert(db):
def test_crossstore_directory_insert(insert_directories):
cur = insert_directories
with pytest.raises(sqlite3.IntegrityError):
# declaring directory as belonging to store 2, but parent's store is 1
db.execute(
'INSERT INTO store_directory VALUES '
'(?, ?, ?, ?, ?)',
cur.execute(
'INSERT INTO store_file VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
(
None,
'.', #name TEXT,
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL,
1, #parent INTEGER, -- parent directory (should be) in same store
'xstore_file', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
False, #frozen BOOL NOT NULL,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
'15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL,
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
None, #source_datum INTEGER,
))
for row in cur.connection.iterdump():
print(row)
cur.execute('SELECT * FROM store_file')
print(cur.fetchall())
@pytest.fixture(autouse=True)
def insert_store_file(db):
db.executemany(
@pytest.fixture
def insert_files(insert_directories):
cur = insert_directories
cur.executemany(
'INSERT INTO store_file VALUES '
'(?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?, ?, ?, ?)',
[(
None,
1, #directory INTEGER NOT NULL
'example.csv', #filename TEXT
1, #frozen BOOL NOT NULL
'-rw-rw-r--', #unfrozen_perms TEXT
'9aafde8f9dbec34c694b86333f746f58958c44247c474904e06d1f07f94292b4', #sha256 TEXT NOT NULL,
None, #source_program INTEGER,
None, #source_datum INTEGER,
), (
None,
@ -197,7 +244,10 @@ def insert_store_file(db):
'plots.png', #filename TEXT
1, #frozen BOOL NOT NULL
'-rw-r--r--', #unfrozen_perms TEXT
'9add10cc3a6f0e4618dfed005ddfbeafdf268c58b773ba0021963c856d00235b', #sha256 TEXT NOT NULL,
None, #source_program INTEGER,
None, #source_datum INTEGER,
)]
)
return cur