Add indexes and rename a few tables

This commit is contained in:
Jacob Hinkle 2022-10-12 10:20:53 -04:00
parent cf926658ee
commit cea5d5abf8
3 changed files with 49 additions and 31 deletions

View File

@ -91,7 +91,7 @@ class FSEntryVersion:
self.uuid, self.uuid,
self.filedir.sha256.hex(), self.filedir.sha256.hex(),
datetime.now().timestamp(), datetime.now().timestamp(),
str(self.filetype), self.filetype.name,
False, False,
self.perms, self.perms,
self.symlink_target, self.symlink_target,
@ -316,7 +316,7 @@ class FSEntry:
@classmethod @classmethod
def from_db_key( def from_db_key(
cls: Type[_FSEntryT], cls: Type[_FSEntryT],
cursor: sqlite3.Cursor, cur: sqlite3.Cursor,
store: "Store", store: "Store",
root_key: Optional[str] = None, root_key: Optional[str] = None,
root_row: Optional[ root_row: Optional[
@ -327,11 +327,11 @@ class FSEntry:
"""Given key of an entry in filedir, recursively fill this object""" """Given key of an entry in filedir, recursively fill this object"""
if root_row is None: if root_row is None:
assert root_key is not None assert root_key is not None
cursor.execute( cur.execute(
"SELECT sha256, name, store FROM filedir WHERE sha256=?", "SELECT sha256, name, store FROM filedir WHERE sha256=?",
(root_key,), (root_key,),
) )
root_row = cursor.fetchone() root_row = cur.fetchone()
root_key, filename, store_key = root_row root_key, filename, store_key = root_row
assert store_key == store.uuid assert store_key == store.uuid
@ -348,21 +348,21 @@ class FSEntry:
store=store, store=store,
) )
cursor.execute( cur.execute(
"SELECT sha256, name, store FROM filedir WHERE parent=?", "SELECT sha256, name, store FROM filedir WHERE parent=?",
(root_key,), (root_key,),
) )
rows = cursor.fetchall() rows = cur.fetchall()
ob.children = [ ob.children = [
cls.from_db_key(cursor, root_row=r, parent=ob, store=store) for r in rows cls.from_db_key(cur=cur, root_row=r, parent=ob, store=store) for r in rows
] ]
# get all versions # get all versions
cursor.execute( cur.execute(
"SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time", "SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time",
(root_key,), (root_key,),
) )
matches = cursor.fetchall() matches = cur.fetchall()
ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches] ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
return ob return ob

View File

@ -60,7 +60,7 @@ CREATE TABLE user(
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python -- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE
); );
CREATE INDEX FK_user_machine ON user (machine);
-- Stores and files (and directories) -- Stores and files (and directories)
-- These are the primary objects tracked by nancy. -- These are the primary objects tracked by nancy.
@ -93,6 +93,8 @@ CREATE TABLE filedir (
parent TEXT REFERENCES filedir ON UPDATE CASCADE, parent TEXT REFERENCES filedir ON UPDATE CASCADE,
UNIQUE(store, name, parent) UNIQUE(store, name, parent)
); );
CREATE INDEX FK_filedir_store ON filedir (store);
CREATE INDEX FK_filedir_parent ON filedir (parent);
-- Detect cross-store references -- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE BEGIN SELECT CASE
@ -140,6 +142,8 @@ CREATE TABLE filedir_version (
source_task TEXT REFERENCES task ON UPDATE CASCADE source_task TEXT REFERENCES task ON UPDATE CASCADE
); );
CREATE INDEX FK_filedir_version_filedir ON filedir_version (filedir);
CREATE INDEX FK_filedir_version_source_task ON filedir_version (source_task);
-- Disallow UPDATING filedir_version. Instead, new version should be created. -- Disallow UPDATING filedir_version. Instead, new version should be created.
-- One exception is during importing, in which case we can disable the trigger -- One exception is during importing, in which case we can disable the trigger
INSERT INTO triggers VALUES('update_filedir_version', TRUE); INSERT INTO triggers VALUES('update_filedir_version', TRUE);
@ -199,39 +203,42 @@ CREATE TABLE program (
environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE, environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE,
message TEXT NOT NULL -- user-defined message to help distinguish similar runs message TEXT NOT NULL -- user-defined message to help distinguish similar runs
); );
CREATE INDEX FK_program_environment ON program (environment);
-- We try to track all python packages that impact execution by traversing a -- We try to track all python packages that impact execution by traversing a
-- copy of sys.modules. This is done once before a "program" and once after in -- copy of sys.modules. This is done once before a "program" and once after in
-- case some calling code winds up calling a previously-unloaded module. -- case some calling code winds up calling a previously-unloaded module.
CREATE TABLE py_package ( CREATE TABLE package (
sha256 TEXT PRIMARY KEY NOT NULL, sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
version TEXT, version TEXT,
UNIQUE (name, version) UNIQUE (name, version)
); );
-- A py_module describes any python module file containing decorated Functions. -- A module describes any python module file containing decorated Functions.
-- Modules are tracked since they impact the global scope of function calls. -- Modules are tracked since they impact the global scope of function calls.
CREATE TABLE py_module( CREATE TABLE module(
sha256 TEXT PRIMARY KEY NOT NULL, sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
code TEXT, -- code doesn't have to be included, but should be used to create sha256 code TEXT, -- code doesn't have to be included, but should be used to create sha256
py_package TEXT REFERENCES py_package ON UPDATE CASCADE package TEXT REFERENCES package ON UPDATE CASCADE
); );
-- A py_function just describes a function, without reference to its arguments. CREATE INDEX FK_module_package ON module (package);
-- A func just describes a function, without reference to its arguments.
-- It can have inputs and outputs, which are described in the func_inputs and -- It can have inputs and outputs, which are described in the func_inputs and
-- func_outputs children tables. -- func_outputs children tables.
CREATE TABLE py_function( CREATE TABLE func(
sha256 TEXT PRIMARY KEY NOT NULL, sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
py_module TEXT NOT NULL REFERENCES py_module ON UPDATE CASCADE module TEXT NOT NULL REFERENCES module ON UPDATE CASCADE
); );
CREATE TABLE py_function_input( CREATE INDEX FK_func_module ON func (module);
CREATE TABLE func_input(
uuid TEXT PRIMARY KEY NOT NULL, uuid TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
typename TEXT NOT NULL, typename TEXT NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE, func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE,
position INTEGER, position INTEGER,
posonly BOOL, posonly BOOL,
@ -239,15 +246,17 @@ CREATE TABLE py_function_input(
description TEXT description TEXT
); );
CREATE TABLE py_function_output( CREATE INDEX FK_func_input_func ON func_input (func);
CREATE TABLE func_output(
uuid TEXT PRIMARY KEY NOT NULL, uuid TEXT PRIMARY KEY NOT NULL,
name TEXT, name TEXT,
typename TEXT NOT NULL, typename TEXT NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE, func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE,
position INTEGER NOT NULL, position INTEGER NOT NULL,
description TEXT description TEXT
); );
CREATE INDEX FK_func_output_func ON func_output (func);
-- Tasks are usually executed calls to Functions: they correspond to a -- Tasks are usually executed calls to Functions: they correspond to a
@ -258,9 +267,11 @@ CREATE TABLE task(
uuid TEXT PRIMARY KEY NOT NULL, uuid TEXT PRIMARY KEY NOT NULL,
program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE, program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE,
-- py_function is NULL for some built-in functionality like "RECORD" programs -- func is NULL for some built-in functionality like "RECORD" programs
py_function TEXT REFERENCES py_function ON UPDATE CASCADE func TEXT REFERENCES func ON UPDATE CASCADE
); );
CREATE INDEX FK_task_program ON task (program);
CREATE INDEX FK_task_func ON task (func);
-- A datum is an object that is computed as the output of a task, given as a -- A datum is an object that is computed as the output of a task, given as a
-- literal value in a config file, or loaded from a file. -- literal value in a config file, or loaded from a file.
CREATE TABLE datum( CREATE TABLE datum(
@ -280,12 +291,14 @@ CREATE TABLE datum(
typename TEXT NOT NULL -- string representation of the data type typename TEXT NOT NULL -- string representation of the data type
); );
CREATE INDEX FK_datum_task ON datum (task);
CREATE INDEX FK_datum_task_output ON datum (task_output);
-- A task_input records the version of a Datum that is passed to a function -- A task_input records the version of a Datum that is passed to a function
CREATE TABLE task_input( CREATE TABLE task_input(
uuid TEXT PRIMARY KEY NOT NULL, uuid TEXT PRIMARY KEY NOT NULL,
task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE, task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE,
-- if this was a python function, reference which input -- if this was a python function, reference which input
py_function_input TEXT REFERENCES py_function_input ON UPDATE CASCADE, func_input TEXT REFERENCES func_input ON UPDATE CASCADE,
datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE, datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE,
-- Data have versions to facilitate tracking non-const operations. If a datum -- Data have versions to facilitate tracking non-const operations. If a datum
@ -293,3 +306,5 @@ CREATE TABLE task_input(
-- version -- version
datum_version INTEGER NOT NULL datum_version INTEGER NOT NULL
); );
CREATE INDEX FK_task_input_func_input ON task_input (func_input);
CREATE INDEX FK_task_input_datum ON task_input (datum);

View File

@ -113,7 +113,10 @@ class Store:
if cur is None: if cur is None:
assert self.conn is not None assert self.conn is not None
cur = self.conn.cursor() cur = self.conn.cursor()
cur.execute("SELECT sha256 FROM filedir WHERE store=1 AND parent is NULL") cur.execute(
"SELECT sha256 FROM filedir WHERE store=? AND parent is NULL",
(self.uuid,),
)
row = cur.fetchone() row = cur.fetchone()
if row is None: if row is None:
return None return None
@ -154,18 +157,18 @@ class Store:
fd_key, filetype = row fd_key, filetype = row
return fs.FSEntry.from_db_key(cur, root_key=fd_key, store=self) return fs.FSEntry.from_db_key(cur=cur, root_key=fd_key, store=self)
def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]: def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]:
"""Return recursive structure containing FSEntry objects from db""" """Return recursive structure containing FSEntry objects from db"""
root_key = self.filedir_root_key() assert self.conn is not None
cur = self.conn.cursor()
root_key = self.filedir_root_key(cur=cur)
logger.debug("root_key: {}", root_key)
if root_key is None: if root_key is None:
return None return None
else: else:
assert self.conn is not None return fs.FSEntry.from_db_key(cur=cur, root_key=root_key, store=self)
return fs.FSEntry.from_db_key(
self.conn.cursor(), root_key=root_key, store=self
)
def new_program(self, name: str, message: str) -> program.Program: def new_program(self, name: str, message: str) -> program.Program:
p = program.Program(self, name, message) p = program.Program(self, name, message)