Add indexes and rename a few tables

This commit is contained in:
Jacob Hinkle 2022-10-12 10:20:53 -04:00
parent cf926658ee
commit cea5d5abf8
3 changed files with 49 additions and 31 deletions

View File

@ -91,7 +91,7 @@ class FSEntryVersion:
self.uuid,
self.filedir.sha256.hex(),
datetime.now().timestamp(),
str(self.filetype),
self.filetype.name,
False,
self.perms,
self.symlink_target,
@ -316,7 +316,7 @@ class FSEntry:
@classmethod
def from_db_key(
cls: Type[_FSEntryT],
cursor: sqlite3.Cursor,
cur: sqlite3.Cursor,
store: "Store",
root_key: Optional[str] = None,
root_row: Optional[
@ -327,11 +327,11 @@ class FSEntry:
"""Given key of an entry in filedir, recursively fill this object"""
if root_row is None:
assert root_key is not None
cursor.execute(
cur.execute(
"SELECT sha256, name, store FROM filedir WHERE sha256=?",
(root_key,),
)
root_row = cursor.fetchone()
root_row = cur.fetchone()
root_key, filename, store_key = root_row
assert store_key == store.uuid
@ -348,21 +348,21 @@ class FSEntry:
store=store,
)
cursor.execute(
cur.execute(
"SELECT sha256, name, store FROM filedir WHERE parent=?",
(root_key,),
)
rows = cursor.fetchall()
rows = cur.fetchall()
ob.children = [
cls.from_db_key(cursor, root_row=r, parent=ob, store=store) for r in rows
cls.from_db_key(cur=cur, root_row=r, parent=ob, store=store) for r in rows
]
# get all versions
cursor.execute(
cur.execute(
"SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time",
(root_key,),
)
matches = cursor.fetchall()
matches = cur.fetchall()
ob.versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
return ob

View File

@ -60,7 +60,7 @@ CREATE TABLE user(
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine TEXT NOT NULL REFERENCES machine ON UPDATE CASCADE
);
CREATE INDEX FK_user_machine ON user (machine);
-- Stores and files (and directories)
-- These are the primary objects tracked by nancy.
@ -93,6 +93,8 @@ CREATE TABLE filedir (
parent TEXT REFERENCES filedir ON UPDATE CASCADE,
UNIQUE(store, name, parent)
);
CREATE INDEX FK_filedir_store ON filedir (store);
CREATE INDEX FK_filedir_parent ON filedir (parent);
-- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE
@ -140,6 +142,8 @@ CREATE TABLE filedir_version (
source_task TEXT REFERENCES task ON UPDATE CASCADE
);
CREATE INDEX FK_filedir_version_filedir ON filedir_version (filedir);
CREATE INDEX FK_filedir_version_source_task ON filedir_version (source_task);
-- Disallow UPDATING filedir_version. Instead, new version should be created.
-- One exception is during importing, in which case we can disable the trigger
INSERT INTO triggers VALUES('update_filedir_version', TRUE);
@ -199,39 +203,42 @@ CREATE TABLE program (
environment TEXT NOT NULL REFERENCES environment ON UPDATE CASCADE,
message TEXT NOT NULL -- user-defined message to help distinguish similar runs
);
CREATE INDEX FK_program_environment ON program (environment);
-- We try to track all python packages that impact execution by traversing a
-- copy of sys.modules. This is done once before a "program" and once after in
-- case some calling code winds up calling a previously-unloaded module.
CREATE TABLE py_package (
CREATE TABLE package (
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
version TEXT,
UNIQUE (name, version)
);
-- A py_module describes any python module file containing decorated Functions.
-- A module describes any python module file containing decorated Functions.
-- Modules are tracked since they impact the global scope of function calls.
CREATE TABLE py_module(
CREATE TABLE module(
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
code TEXT, -- code doesn't have to be included, but should be used to create sha256
py_package TEXT REFERENCES py_package ON UPDATE CASCADE
package TEXT REFERENCES package ON UPDATE CASCADE
);
-- A py_function just describes a function, without reference to its arguments.
CREATE INDEX FK_module_package ON module (package);
-- A func just describes a function, without reference to its arguments.
-- It can have inputs and outputs, which are described in the func_inputs and
-- func_outputs children tables.
CREATE TABLE py_function(
CREATE TABLE func(
sha256 TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
py_module TEXT NOT NULL REFERENCES py_module ON UPDATE CASCADE
module TEXT NOT NULL REFERENCES module ON UPDATE CASCADE
);
CREATE TABLE py_function_input(
CREATE INDEX FK_func_module ON func (module);
CREATE TABLE func_input(
uuid TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
typename TEXT NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE,
position INTEGER,
posonly BOOL,
@ -239,15 +246,17 @@ CREATE TABLE py_function_input(
description TEXT
);
CREATE TABLE py_function_output(
CREATE INDEX FK_func_input_func ON func_input (func);
CREATE TABLE func_output(
uuid TEXT PRIMARY KEY NOT NULL,
name TEXT,
typename TEXT NOT NULL,
py_function TEXT NOT NULL REFERENCES py_function ON UPDATE CASCADE,
func TEXT NOT NULL REFERENCES func ON UPDATE CASCADE,
position INTEGER NOT NULL,
description TEXT
);
CREATE INDEX FK_func_output_func ON func_output (func);
-- Tasks are usually executed calls to Functions: they correspond to a
@ -258,9 +267,11 @@ CREATE TABLE task(
uuid TEXT PRIMARY KEY NOT NULL,
program TEXT NOT NULL REFERENCES program ON UPDATE CASCADE,
-- py_function is NULL for some built-in functionality like "RECORD" programs
py_function TEXT REFERENCES py_function ON UPDATE CASCADE
-- func is NULL for some built-in functionality like "RECORD" programs
func TEXT REFERENCES func ON UPDATE CASCADE
);
CREATE INDEX FK_task_program ON task (program);
CREATE INDEX FK_task_func ON task (func);
-- A datum is an object that is computed as the output of a task, given as a
-- literal value in a config file, or loaded from a file.
CREATE TABLE datum(
@ -280,12 +291,14 @@ CREATE TABLE datum(
typename TEXT NOT NULL -- string representation of the data type
);
CREATE INDEX FK_datum_task ON datum (task);
CREATE INDEX FK_datum_task_output ON datum (task_output);
-- A task_input records the version of a Datum that is passed to a function
CREATE TABLE task_input(
uuid TEXT PRIMARY KEY NOT NULL,
task TEXT NOT NULL REFERENCES task ON UPDATE CASCADE,
-- if this was a python function, reference which input
py_function_input TEXT REFERENCES py_function_input ON UPDATE CASCADE,
func_input TEXT REFERENCES func_input ON UPDATE CASCADE,
datum TEXT NOT NULL REFERENCES datum ON UPDATE CASCADE,
-- Data have versions to facilitate tracking non-const operations. If a datum
@ -293,3 +306,5 @@ CREATE TABLE task_input(
-- version
datum_version INTEGER NOT NULL
);
CREATE INDEX FK_task_input_func_input ON task_input (func_input);
CREATE INDEX FK_task_input_datum ON task_input (datum);

View File

@ -113,7 +113,10 @@ class Store:
if cur is None:
assert self.conn is not None
cur = self.conn.cursor()
cur.execute("SELECT sha256 FROM filedir WHERE store=1 AND parent is NULL")
cur.execute(
"SELECT sha256 FROM filedir WHERE store=? AND parent is NULL",
(self.uuid,),
)
row = cur.fetchone()
if row is None:
return None
@ -154,18 +157,18 @@ class Store:
fd_key, filetype = row
return fs.FSEntry.from_db_key(cur, root_key=fd_key, store=self)
return fs.FSEntry.from_db_key(cur=cur, root_key=fd_key, store=self)
def fs_entries(self, shallow: bool = False) -> Optional[fs.FSEntry]:
"""Return recursive structure containing FSEntry objects from db"""
root_key = self.filedir_root_key()
assert self.conn is not None
cur = self.conn.cursor()
root_key = self.filedir_root_key(cur=cur)
logger.debug("root_key: {}", root_key)
if root_key is None:
return None
else:
assert self.conn is not None
return fs.FSEntry.from_db_key(
self.conn.cursor(), root_key=root_key, store=self
)
return fs.FSEntry.from_db_key(cur=cur, root_key=root_key, store=self)
def new_program(self, name: str, message: str) -> program.Program:
p = program.Program(self, name, message)