From cea5d5abf8ae84c7f21257cbcc2eabe47830e9bc Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Wed, 12 Oct 2022 10:20:53 -0400 Subject: [PATCH] Add indexes and rename a few tables --- src/nancy/fs.py | 18 +++++++------- src/nancy/schema/version0.sql | 45 +++++++++++++++++++++++------------ src/nancy/store.py | 17 +++++++------ 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/src/nancy/fs.py b/src/nancy/fs.py index ea70e61..ceb431c 100644 --- a/src/nancy/fs.py +++ b/src/nancy/fs.py @@ -91,7 +91,7 @@ class FSEntryVersion: self.uuid, self.filedir.sha256.hex(), datetime.now().timestamp(), - str(self.filetype), + self.filetype.name, False, self.perms, self.symlink_target, @@ -316,7 +316,7 @@ class FSEntry: @classmethod def from_db_key( cls: Type[_FSEntryT], - cursor: sqlite3.Cursor, + cur: sqlite3.Cursor, store: "Store", root_key: Optional[str] = None, root_row: Optional[ @@ -327,11 +327,11 @@ class FSEntry: """Given key of an entry in filedir, recursively fill this object""" if root_row is None: assert root_key is not None - cursor.execute( + cur.execute( "SELECT sha256, name, store FROM filedir WHERE sha256=?", (root_key,), ) - root_row = cursor.fetchone() + root_row = cur.fetchone() root_key, filename, store_key = root_row assert store_key == store.uuid @@ -348,21 +348,21 @@ class FSEntry: store=store, ) - cursor.execute( + cur.execute( "SELECT sha256, name, store FROM filedir WHERE parent=?", (root_key,), ) - rows = cursor.fetchall() + rows = cur.fetchall() ob.children = [ - cls.from_db_key(cursor, root_row=r, parent=ob, store=store) for r in rows + cls.from_db_key(cur=cur, root_row=r, parent=ob, store=store) for r in rows ] # get all versions - cursor.execute( + cur.execute( "SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time", (root_key,), ) - matches = cursor.fetchall() + matches = cur.fetchall() ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches] return ob diff --git a/src/nancy/schema/version0.sql b/src/nancy/schema/version0.sql index da0e5f5..743e0f7 100644 --- a/src/nancy/schema/version0.sql +++ b/src/nancy/schema/version0.sql @@ -60,7 +60,7 @@ CREATE TABLE user( -- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE ); - +CREATE INDEX FK_user_machine ON user (machine); -- Stores and files (and directories) -- These are the primary objects tracked by nancy. @@ -93,6 +93,8 @@ CREATE TABLE filedir ( parent TEXT REFERENCES filedir ON UPDATE CASCADE, UNIQUE(store, name, parent) ); +CREATE INDEX FK_filedir_store ON filedir (store); +CREATE INDEX FK_filedir_parent ON filedir (parent); -- Detect cross-store references CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir BEGIN SELECT CASE @@ -140,6 +142,8 @@ CREATE TABLE filedir_version ( source_task TEXT REFERENCES task ON UPDATE CASCADE ); +CREATE INDEX FK_filedir_version_filedir ON filedir_version (filedir); +CREATE INDEX FK_filedir_version_source_task ON filedir_version (source_task); -- Disallow UPDATING filedir_version. Instead, new version should be created. -- One exception is during importing, in which case we can disable the trigger INSERT INTO triggers VALUES('update_filedir_version', TRUE); @@ -199,39 +203,42 @@ CREATE TABLE program ( environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE, message TEXT NOT NULL -- user-defined message to help distinguish similar runs ); +CREATE INDEX FK_program_environment ON program (environment); -- We try to track all python packages that impact execution by traversing a -- copy of sys.modules. This is done once before a "program" and once after in -- case some calling code winds up calling a previously-unloaded module. -CREATE TABLE py_package ( +CREATE TABLE package ( sha256 TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, version TEXT, UNIQUE (name, version) ); --- A py_module describes any python module file containing decorated Functions. +-- A module describes any python module file containing decorated Functions. -- Modules are tracked since they impact the global scope of function calls. -CREATE TABLE py_module( +CREATE TABLE module( sha256 TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, code TEXT, -- code doesn't have to be included, but should be used to create sha256 - py_package TEXT REFERENCES py_package ON UPDATE CASCADE + package TEXT REFERENCES package ON UPDATE CASCADE ); --- A py_function just describes a function, without reference to its arguments. +CREATE INDEX FK_module_package ON module (package); +-- A func just describes a function, without reference to its arguments. -- It can have inputs and outputs, which are described in the func_inputs and -- func_outputs children tables. -CREATE TABLE py_function( +CREATE TABLE func( sha256 TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, - py_module TEXT NOT NULL REFERENCES py_module ON UPDATE CASCADE + module TEXT NOT NULL REFERENCES module ON UPDATE CASCADE ); -CREATE TABLE py_function_input( +CREATE INDEX FK_func_module ON func (module); +CREATE TABLE func_input( uuid TEXT PRIMARY KEY NOT NULL, name TEXT NOT NULL, typename TEXT NOT NULL, - py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE, + func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE, position INTEGER, posonly BOOL, @@ -239,15 +246,17 @@ CREATE TABLE py_function_input( description TEXT ); -CREATE TABLE py_function_output( +CREATE INDEX FK_func_input_func ON func_input (func); +CREATE TABLE func_output( uuid TEXT PRIMARY KEY NOT NULL, name TEXT, typename TEXT NOT NULL, - py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE, + func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE, position INTEGER NOT NULL, description TEXT ); +CREATE INDEX FK_func_output_func ON func_output (func); -- Tasks are usually executed calls to Functions: they correspond to a @@ -258,9 +267,11 @@ CREATE TABLE task( uuid TEXT PRIMARY KEY NOT NULL, program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE, - -- py_function is NULL for some built-in functionality like "RECORD" programs - py_function TEXT REFERENCES py_function ON UPDATE CASCADE + -- func is NULL for some built-in functionality like "RECORD" programs + func TEXT REFERENCES func ON UPDATE CASCADE ); +CREATE INDEX FK_task_program ON task (program); +CREATE INDEX FK_task_func ON task (func); -- A datum is an object that is computed as the output of a task, given as a -- literal value in a config file, or loaded from a file. CREATE TABLE datum( @@ -280,12 +291,14 @@ CREATE TABLE datum( typename TEXT NOT NULL -- string representation of the data type ); +CREATE INDEX FK_datum_task ON datum (task); +CREATE INDEX FK_datum_task_output ON datum (task_output); -- A task_input records the version of a Datum that is passed to a function CREATE TABLE task_input( uuid TEXT PRIMARY KEY NOT NULL, task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE, -- if this was a python function, reference which input - py_function_input TEXT REFERENCES py_function_input ON UPDATE CASCADE, + func_input TEXT REFERENCES func_input ON UPDATE CASCADE, datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE, -- Data have versions to facilitate tracking non-const operations. If a datum @@ -293,3 +306,5 @@ CREATE TABLE task_input( -- version datum_version INTEGER NOT NULL ); +CREATE INDEX FK_task_input_func_input ON task_input (func_input); +CREATE INDEX FK_task_input_datum ON task_input (datum); diff --git a/src/nancy/store.py b/src/nancy/store.py index 89515c4..b67bc44 100644 --- a/src/nancy/store.py +++ b/src/nancy/store.py @@ -113,7 +113,10 @@ class Store: if cur is None: assert self.conn is not None cur = self.conn.cursor() - cur.execute("SELECT sha256 FROM filedir WHERE store=1 AND parent is NULL") + cur.execute( + "SELECT sha256 FROM filedir WHERE store=? AND parent is NULL", + (self.uuid,), + ) row = cur.fetchone() if row is None: return None @@ -154,18 +157,18 @@ class Store: fd_key, filetype = row - return fs.FSEntry.from_db_key(cur, root_key=fd_key, store=self) + return fs.FSEntry.from_db_key(cur=cur, root_key=fd_key, store=self) def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]: """Return recursive structure containing FSEntry objects from db""" - root_key = self.filedir_root_key() + assert self.conn is not None + cur = self.conn.cursor() + root_key = self.filedir_root_key(cur=cur) + logger.debug("root_key: {}", root_key) if root_key is None: return None else: - assert self.conn is not None - return fs.FSEntry.from_db_key( - self.conn.cursor(), root_key=root_key, store=self - ) + return fs.FSEntry.from_db_key(cur=cur, root_key=root_key, store=self) def new_program(self, name: str, message: str) -> program.Program: p = program.Program(self, name, message)