Update schema and add tests which don't rely on python
This commit is contained in:
parent
9e7af6b8a1
commit
311990d5f6
@ -2,56 +2,130 @@
|
||||
PRAGMA user_version=0; -- version of this schema.
|
||||
-- ^^^^^ Increment the above whenever meaningful updates are made to this file.
|
||||
|
||||
|
||||
PRAGMA foreign_keys=ON;
|
||||
-- NOTE that ON UPDATE CASCADE is used for all foreign keys, in order to
|
||||
-- facilitate easy importing of one database into another
|
||||
|
||||
|
||||
-- We track host machines, so that we can give better information about file
|
||||
-- locations. This also allows us to see whether platform-dependent behavior may
|
||||
-- come into play. Note that these are not reliable identifiers for purposes of
|
||||
-- tracking down data, but may be helpful context.
|
||||
CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
|
||||
hostname TEXT, -- platform.node(): 'lucky'
|
||||
processor TEXT, -- platform.processor():
|
||||
system TEXT, -- platform.system(): 'Linux'
|
||||
release TEXT, -- platform.release(): '5.15.64'
|
||||
machine TEXT, -- platform.machine(): 'x86_64'
|
||||
timezone TEXT, -- timezone, for interpreting event times
|
||||
freedesktop_os_release TEXT, -- requires python 3.10
|
||||
-- platform.freedesktop_os_release() as JSON
|
||||
-- "{'NAME': 'NixOS', 'ID': 'nixos',
|
||||
-- 'PRETTY_NAME': 'NixOS 22.05 (Quokka)', 'BUG_REPORT_URL':
|
||||
-- 'https://github.com/NixOS/nixpkgs/issues', 'BUILD_ID':
|
||||
-- '22.05.20220902.67e4507', 'DOCUMENTATION_URL':
|
||||
-- 'https://nixos.org/learn.html', 'HOME_URL': 'https://nixos.org/', 'LOGO':
|
||||
-- 'nix-snowflake', 'SUPPORT_URL': 'https://nixos.org/community.html',
|
||||
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
|
||||
-- '22.05'}"
|
||||
win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
);
|
||||
-- Programs are run by users on machines
|
||||
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
username TEXT NOT NULL,
|
||||
userid INTEGER,
|
||||
fullname TEXT,
|
||||
machine INTEGER NOT NULL,
|
||||
|
||||
UNIQUE(userid, machine),
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
|
||||
-- Stores, directories, and files
|
||||
-- These are the primary objects tracked by nancy.
|
||||
-- A store is a directory containing a file called nancy.db
|
||||
-- A store is a directory containing a file called nancy.db (e.g. the dir holding this database)
|
||||
CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT,
|
||||
dbpath TEXT NOT NULL
|
||||
machine INTEGER,
|
||||
dbpath TEXT NOT NULL,
|
||||
imported BOOL, -- is this the store for the current directory? If not, then it was
|
||||
-- imported in order to describe a dependency.
|
||||
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- Within the store are directories that contain files.
|
||||
-- Note that there should be one entry with relpath='.' for the non-imported store.
|
||||
CREATE TABLE store_directory (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT,
|
||||
store INTEGER NOT NULL,
|
||||
relpath TEXT,
|
||||
parent INTEGER, -- parent directory (should be) in same store
|
||||
frozen BOOL NOT NULL,
|
||||
FOREIGN KEY (store) REFERENCES store (id)
|
||||
|
||||
UNIQUE(store, name, parent),
|
||||
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE,
|
||||
-- parent is a recursive key within this table.
|
||||
FOREIGN KEY (parent) REFERENCES store_directory (id) ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TRIGGER insert_store_directory BEFORE INSERT ON store_directory
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER update_store_directory BEFORE UPDATE ON store_directory
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
|
||||
CREATE TABLE store_file (id INTEGER PRIMARY KEY NOT NULL,
|
||||
directory INTEGER NOT NULL,
|
||||
relpath TEXT, -- path relative to top-level of _store_, not dir
|
||||
directory INTEGER NOT NULL, -- reference a directory inside a store
|
||||
filename TEXT, -- only a filename, not a path
|
||||
frozen BOOL NOT NULL,
|
||||
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
|
||||
source_datum INTEGER,
|
||||
|
||||
FOREIGN KEY (directory) REFERENCES store_directory (id)
|
||||
FOREIGN KEY (source_datum) REFERENCES datum (id)
|
||||
UNIQUE(filename, directory),
|
||||
FOREIGN KEY (directory) REFERENCES store_directory (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- computational environment which can execute "runs"
|
||||
-- A computational environment which can execute "processes". Note that the
|
||||
-- python executable being used, and environment variables are recorded here.
|
||||
-- Other info is available in the parent "machine" table.
|
||||
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
hostname TEXT, -- platform.node(): computer's network name
|
||||
envvars_json TEXT, -- json.dumps(dict(os.environ))
|
||||
python_implementation TEXT, -- platform.python_implementation(): 'cpython'
|
||||
python_strversion TEXT, -- sys.version: '3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]'
|
||||
python_hexversion INTEGER, -- sys.hexversion: 50923504
|
||||
python_host_platform TEXT -- platform.platform(): Linux-5.15.0-46-generic-x86_64-with-glibc2.31
|
||||
user INTEGER,
|
||||
|
||||
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- A "process" is an execution of a single graph, starting from a number of roots
|
||||
-- It corresponds to a single host process. Note that if multiple calls to
|
||||
-- nancy.run() are provided within a single python script, multiple "processes"
|
||||
-- may be created. Also note that historical processes are automatically
|
||||
-- A "program" is an execution of a single graph, starting from a number of roots
|
||||
-- It corresponds to a single host parent process. Note that if multiple calls to
|
||||
-- nancy.run() are provided within a single python script, multiple "programs"
|
||||
-- may be created. Also note that historical programs are automatically
|
||||
-- imported and merged when possible when loading a "datum" from disk.
|
||||
CREATE TABLE process (id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT, -- name of the program, usually written in code e.g. cnn_crossval
|
||||
-- we use POSIX timestamps in UTC for time recording.
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
process_id INTEGER, -- host PID of python process on host OS
|
||||
environment INTEGER NOT NULL,
|
||||
FOREIGN KEY (environment) REFERENCES environment (id)
|
||||
message TEXT, -- user-defined message to help distinguish similar runs
|
||||
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- We try to track all python packages that impact execution by traversing a
|
||||
-- copy of sys.modules. This is done once before a "process" and once after in
|
||||
@ -60,7 +134,7 @@ CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
version TEXT,
|
||||
process INTEGER NOT NULL,
|
||||
FOREIGN KEY (process) REFERENCES process (id)
|
||||
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A py_module describes any python module file containing decorated Functions.
|
||||
-- Modules are tracked since they impact the global scope of function calls.
|
||||
@ -68,7 +142,7 @@ CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
code TEXT,
|
||||
package INTEGER,
|
||||
FOREIGN KEY (package) REFERENCES py_package (id)
|
||||
FOREIGN KEY (package) REFERENCES py_package (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A "func" just describes a function, without reference to its arguments. It
|
||||
-- can have inputs and outputs, which are described in the func_inputs and
|
||||
@ -80,7 +154,7 @@ CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
FOREIGN KEY (module) REFERENCES py_module (id)
|
||||
FOREIGN KEY (module) REFERENCES py_module (id) ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TABLE func_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
@ -93,7 +167,7 @@ CREATE TABLE func_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
|
||||
description TEXT,
|
||||
|
||||
FOREIGN KEY (func) REFERENCES func (id)
|
||||
FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT,
|
||||
@ -103,7 +177,7 @@ CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
position INTEGER NOT NULL,
|
||||
description TEXT,
|
||||
|
||||
FOREIGN KEY (func) REFERENCES func (id)
|
||||
FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
|
||||
@ -114,8 +188,8 @@ CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
|
||||
func INTEGER NOT NULL,
|
||||
process INTEGER NOT NULL,
|
||||
FOREIGN KEY (func) REFERENCES func (id),
|
||||
FOREIGN KEY (process) REFERENCES process (id)
|
||||
FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A datum is an object that is computed as the output of a task, given as a
|
||||
-- literal value in a config file, or loaded from a file.
|
||||
@ -141,15 +215,15 @@ CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- hash of provenance summarizes all dependencies, independent of environment
|
||||
-- computed _before_ computation
|
||||
hash_provenance TEXT,
|
||||
hash_content TEXT, -- hash of content
|
||||
hash_content TEXT, -- hash of content is only computed when this datum is saved to disk
|
||||
|
||||
-- Version starts at 0 when provided/first computed. When passed as
|
||||
-- a non-const input to a non-const function, this version recorded and
|
||||
-- incremented
|
||||
current_version INTEGER NOT NULL,
|
||||
|
||||
FOREIGN KEY (task) REFERENCES task (id),
|
||||
FOREIGN KEY (task_output) REFERENCES func_output (id)
|
||||
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (task_output) REFERENCES func_output (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A task input records the version of a Datum that is passed to the function
|
||||
CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
@ -161,7 +235,7 @@ CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- is passed to a non-const operation, a new datum is provided
|
||||
datum_version INTEGER NOT NULL,
|
||||
|
||||
FOREIGN KEY (task) REFERENCES task (id),
|
||||
FOREIGN KEY (func_input) REFERENCES func_input (id),
|
||||
FOREIGN KEY (datum) REFERENCES datum (id)
|
||||
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (func_input) REFERENCES func_input (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (datum) REFERENCES datum (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
207
tests/test_db.py
207
tests/test_db.py
@ -1,8 +1,203 @@
|
||||
def test_schema_version_match():
|
||||
import nancy
|
||||
from nancy.version import schema_version
|
||||
"""
|
||||
Pure SQL tests that don't depend on nancy's Python code
|
||||
"""
|
||||
import pytest
|
||||
|
||||
cur = nancy._conn.cursor()
|
||||
(db_schema_ver,) = cur.execute("PRAGMA user_version;").fetchone()
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db():
|
||||
"""Create an in-memory database that follow's the nancy schema"""
|
||||
conn = sqlite3.connect(':memory:')
|
||||
cur = conn.cursor()
|
||||
#import importlib
|
||||
#schema = importlib.resources.open_text("nancy", "schema.sql").read()
|
||||
schema = open(os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
'../src/nancy/schema.sql',
|
||||
), 'r').read()
|
||||
cur.executescript(schema)
|
||||
yield cur
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_machine(db):
|
||||
db.executemany(
|
||||
'INSERT INTO machine VALUES '
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
'lucky', #hostname TEXT, -- platform.node(): 'lucky'
|
||||
'', #processor TEXT, -- platform.processor():
|
||||
'Linux', #system TEXT, -- platform.system(): 'Linux'
|
||||
'5.15.64', #release TEXT, -- platform.release(): '5.15.64'
|
||||
'aarch64', #machine TEXT, -- platform.machine(): 'x86_64'
|
||||
'EDT', #timezone TEXT, -- timezone, for interpreting event times
|
||||
'', #freedesktop_os_release TEXT, -- requires python 3.10
|
||||
'', #win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
'', #mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
), (
|
||||
None,
|
||||
'a100', #hostname TEXT, -- platform.node(): 'lucky'
|
||||
'', #processor TEXT, -- platform.processor():
|
||||
'Linux', #system TEXT, -- platform.system(): 'Linux'
|
||||
'5.15.63', #release TEXT, -- platform.release(): '5.15.64'
|
||||
'x86_64', #machine TEXT, -- platform.machine(): 'x86_64'
|
||||
'EST', #timezone TEXT, -- timezone, for interpreting event times
|
||||
'', #freedesktop_os_release TEXT, -- requires python 3.10
|
||||
'', #win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
'', #mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
)],
|
||||
)
|
||||
def test_insert_machine(db):
|
||||
db.execute('SELECT * FROM machine')
|
||||
machines = db.fetchall()
|
||||
assert len(machines) == 2
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_user(db):
|
||||
db.executemany(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
101, #userid INTEGER,
|
||||
'Jacob Hinkle', #fullname TEXT,
|
||||
1, #machine INTEGER NOT NULL,
|
||||
), (
|
||||
None,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
10301, #userid INTEGER,
|
||||
'Jacob Hinkle', #fullname TEXT,
|
||||
2, #machine INTEGER NOT NULL,
|
||||
), (
|
||||
None,
|
||||
'bob', #username TEXT NOT NULL,
|
||||
2035, #userid INTEGER,
|
||||
'Just Bob', #fullname TEXT,
|
||||
2, #machine INTEGER NOT NULL,
|
||||
)],
|
||||
)
|
||||
def test_insert_user(db):
|
||||
db.execute('SELECT * FROM user')
|
||||
users = db.fetchall()
|
||||
assert len(users) == 3
|
||||
def test_invalid_user_machine(db):
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# should fail foreign key constraint
|
||||
db.execute(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
'bozo', #username TEXT NOT NULL,
|
||||
100, #userid INTEGER,
|
||||
'Bozo the Clown', #fullname TEXT,
|
||||
3, #machine INTEGER NOT NULL,
|
||||
),
|
||||
)
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# should fail uniqueness constraint
|
||||
db.execute(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
101, #userid INTEGER,
|
||||
'Bozo the Clown', #fullname TEXT,
|
||||
1, #machine INTEGER NOT NULL,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store(db):
|
||||
db.executemany(
|
||||
'INSERT INTO store VALUES '
|
||||
'(?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
1, #machine INTEGER,
|
||||
'/path/to/first/store', #dbpath TEXT NOT NULL,
|
||||
False, #imported BOOL,
|
||||
), (
|
||||
None,
|
||||
1, #machine INTEGER,
|
||||
'/path/to/dependencys/store', #dbpath TEXT NOT NULL,
|
||||
True, #imported BOOL,
|
||||
), (
|
||||
None,
|
||||
2, #machine INTEGER,
|
||||
# same path but on a separate machine
|
||||
'/path/to/first/store', #dbpath TEXT NOT NULL,
|
||||
True, #imported BOOL,
|
||||
)],
|
||||
)
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store_directory(db):
|
||||
db.executemany(
|
||||
'INSERT INTO store_directory VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
1, #store INTEGER NOT NULL,
|
||||
None, #parent INTEGER, -- parent directory (should be) in same store
|
||||
False, #frozen BOOL NOT NULL,
|
||||
), (
|
||||
None,
|
||||
'foo', #name TEXT,
|
||||
1, #store INTEGER NOT NULL,
|
||||
1, #parent INTEGER, -- parent directory (should be) in same store
|
||||
False, #frozen BOOL NOT NULL,
|
||||
), (
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
2, #store INTEGER NOT NULL,
|
||||
None, #parent INTEGER, -- parent directory (should be) in same store
|
||||
False, #frozen BOOL NOT NULL,
|
||||
)],
|
||||
)
|
||||
|
||||
def test_crossstore_directory_insert(db):
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# declaring directory as belonging to store 2, but parent's store is 1
|
||||
db.execute(
|
||||
'INSERT INTO store_directory VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
2, #store INTEGER NOT NULL,
|
||||
1, #parent INTEGER, -- parent directory (should be) in same store
|
||||
False, #frozen BOOL NOT NULL,
|
||||
))
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store_file(db):
|
||||
db.executemany(
|
||||
'INSERT INTO store_file VALUES '
|
||||
'(?, ?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
1, #directory INTEGER NOT NULL
|
||||
'example.csv', #filename TEXT
|
||||
1, #frozen BOOL NOT NULL
|
||||
'-rw-rw-r--', #unfrozen_perms TEXT
|
||||
None, #source_datum INTEGER,
|
||||
), (
|
||||
None,
|
||||
2, #directory INTEGER NOT NULL
|
||||
'plots.png', #filename TEXT
|
||||
1, #frozen BOOL NOT NULL
|
||||
'-rw-r--r--', #unfrozen_perms TEXT
|
||||
None, #source_datum INTEGER,
|
||||
)]
|
||||
)
|
||||
|
||||
assert schema_version == db_schema_ver
|
||||
|
||||
19
tests/test_store.py
Normal file
19
tests/test_store.py
Normal file
@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
@pytest.fixture
|
||||
def store():
|
||||
from nancy import store
|
||||
|
||||
s = store.Store.init()
|
||||
yield s
|
||||
|
||||
|
||||
|
||||
def test_schema_version_match(store):
|
||||
from nancy.version import schema_version
|
||||
|
||||
cur = store.conn.cursor()
|
||||
(db_schema_ver,) = cur.execute("PRAGMA user_version;").fetchone()
|
||||
|
||||
assert schema_version == db_schema_ver
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user