Add Program, Environment, User, Machine, with auto insertion

Also removed freeze and thaw, and converted to record().
This commit is contained in:
Jacob Hinkle 2022-09-21 20:51:57 -04:00
parent d0f113645b
commit f18368f1e0
10 changed files with 369 additions and 98 deletions

View File

@ -2,7 +2,8 @@ import click
from ..version import __version__ from ..version import __version__
from .freeze import freeze, thaw #from .freeze import freeze, thaw
from .record import record
# from https://click.palletsprojects.com/en/5.x/advanced/ # from https://click.palletsprojects.com/en/5.x/advanced/
@ -33,6 +34,7 @@ def main():
""" """
pass pass
main.add_command(freeze) #main.add_command(freeze)
main.add_command(thaw) #main.add_command(thaw)
main.add_command(record)
main.add_command(version) main.add_command(version)

29
src/nancy/cli/record.py Normal file
View File

@ -0,0 +1,29 @@
import click
from .. import store
import os
@click.command()
@click.argument("directory", default='.')
def record(directory):
"""
Initialize tracking or record changes to a tracked directory.
If DIRECTORY is not already part of an existing nancy store, then a new
'nancy.db' file is created in that directory. On the other hand, if the
directory is part of an existing store, it will be updated and versions
of any files changes since the last recording will be incremented.
"""
if not os.path.isdir(directory):
raise ValueError(f"Cannot record non-existent directory {directory}")
existing_store = store.find_store(directory)
if existing_store is None: # this is a new store
s = store.Store.init(directory)
else: # this is an existing store
s = store.Store(directory)
s.record()

View File

@ -3,10 +3,22 @@ import os
import sqlite3 import sqlite3
# This matches the recorded user_version in any nancy.db initialized in this # This will match the user_version in any nancy.db initialized by this process
# process.
schema_version = 0 schema_version = 0
# SQLite features we rely on:
# UPSERT (3.24.0, 2018-06-04)
min_sqlite_version = (3, 24, 0)
sqlite_version = sqlite3.sqlite_version_info
sqlite_verstr = '.'.join(str(v) for v in sqlite_version)
min_sqlite_verstr = '.'.join(str(v) for v in min_sqlite_version)
(major, sub, minor) = sqlite_version
if major < min_sqlite_version[0] or \
sub < min_sqlite_version[1] or \
minor < min_sqlite_version[2]:
warning.warn(f"Minimum sqlite version is {min_sqlite_verstr}. Found {sqlite_verstr}")
def init_schema(cur): def init_schema(cur):
"""Initialize a database following the current schema.""" """Initialize a database following the current schema."""

73
src/nancy/environment.py Normal file
View File

@ -0,0 +1,73 @@
from . import user
from typing import NamedTuple
import json
import os
import platform
import sys
import time
class Environment(NamedTuple):
id: int
envvars_json: str
python_implementation: str
python_strversion: str
python_hexversion: int
user: user.User
@classmethod
def find_or_insert(cls, cur, env=None):
"""Given a DB cursor, find or create row in environment table and fill"""
if env is None:
env = cls.detect()
u = user.User.find_or_insert(cur)
env = env._replace(user=u.id)
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
environment
WHERE
envvars_json = ? AND
python_implementation = ? AND
python_strversion = ? AND
python_hexversion = ? AND
user = ?
LIMIT 1
''',
env[1:],
)
res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO environment VALUES (?,?,?,?,?,?);
''',
env,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return env._replace(id=id)
@classmethod
def detect(cls):
"""Detect values for environment independent of the database.
Note that the user entry will not have a valid id.
"""
u = user.User.detect()
return cls(
None,
json.dumps(dict(os.environ)),
platform.python_implementation(),
sys.version,
sys.hexversion,
u.id,
)

View File

@ -47,18 +47,18 @@ def remove_write_perms(path):
def make_readonly_recursive(path, excluded=[]): def make_readonly_recursive(path, excluded=[]):
"""Recursively "freeze" a directory by setting all files and directories read-only""" """Recursively "freeze" a directory by setting all files and directories read-only"""
# traversing bottom-up makes it easier to freeze perms on directories # traversing bottom-up makes it easier to freeze perms on directories
for root, dirs, files in os.walk(self.path, topdown=False): for root, dirs, files in os.walk(path, topdown=False):
for f in files: for f in files:
p = os.path.join(root, f) p = os.path.join(root, f)
if p in excluded: if p in excluded:
continue continue
remove_write_perms(os.path.join(self.path, p)) remove_write_perms(os.path.join(path, p))
for d in dirs: for d in dirs:
p = os.path.join(root, d) p = os.path.join(root, d)
if p in excluded: if p in excluded:
continue continue
remove_write_perms(os.path.join(self.path, p)) remove_write_perms(os.path.join(path, p))
@dataclass @dataclass

View File

@ -1,24 +1,69 @@
from collections import namedtuple from typing import NamedTuple
import json import json
import platform import platform
import time import time
MachineInfo = namedtuple( class Machine(NamedTuple):
'MachineInfo', id: int
[ machine_id: str
'machine_id', hostname: str
'hostname', processor: str
'processor', system: str
'system', release: str
'release', cpu_type: str
'cpu_type', timezone: str
'timezone', freedesktop_os_release: str
'freedesktop_os_release', win32_ver: str
'win32_ver', mac_ver: str
'mac_ver',
], @classmethod
def find_or_insert(cls, cur, machine=None):
"""Given a DB cursor, find or create row in machine table and fill"""
if machine is None:
machine = cls.detect()
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
machine
WHERE
machine_id = ? AND
hostname = ? AND
processor = ? AND
system = ? AND
release = ? AND
cpu_type = ? AND
timezone = ? AND
freedesktop_os_release = ? AND
win32_ver = ? AND
mac_ver = ?
LIMIT 1
''',
machine[1:]
) )
def get_machine_info(): res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO machine VALUES (?,?,?,?,?,?,?,?,?,?,?);
''',
machine,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return machine._replace(id=id)
@classmethod
def detect(cls):
"""Formats machine-specific information into a MachineInfo object.
Note that 'MachineInfo' objects are properly formatted to be inserted into
the `machine` table.
"""
fdor = '' fdor = ''
try: try:
fdor = json.dumps(platform.freedesktop_os_release()) fdor = json.dumps(platform.freedesktop_os_release())
@ -34,7 +79,8 @@ def get_machine_info():
except FileNotFoundError: except FileNotFoundError:
pass pass
return MachineInfo( return cls(
None,
machine_id=mid, machine_id=mid,
hostname=platform.node(), hostname=platform.node(),
processor=platform.processor(), processor=platform.processor(),
@ -47,4 +93,3 @@ def get_machine_info():
mac_ver=json.dumps(platform.mac_ver()), mac_ver=json.dumps(platform.mac_ver()),
) )

View File

@ -35,7 +35,11 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID': -- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
-- '22.05'}" -- '22.05'}"
win32_ver TEXT, -- platform.win32_ver() as JSON win32_ver TEXT, -- platform.win32_ver() as JSON
mac_ver TEXT -- platform.mac_ver() as JSON mac_ver TEXT, -- platform.mac_ver() as JSON
-- disallow duplicate rows
UNIQUE(hostname, processor, system, release, cpu_type, timezone,
freedesktop_os_release, win32_ver, mac_ver)
); );
-- Programs are run by users on machines -- Programs are run by users on machines
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL, CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
@ -45,7 +49,11 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python -- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine INTEGER NOT NULL, machine INTEGER NOT NULL,
UNIQUE(userid, machine), -- usernames and fullnames change rarely, but it happens. A more restrictive
-- constraint would not enable tracking of this info. The following only
-- guarantees no duplicate full rows.
UNIQUE(username, userid, fullname, machine),
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
); );
@ -118,6 +126,9 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
python_hexversion INTEGER, -- sys.hexversion: 50923504 python_hexversion INTEGER, -- sys.hexversion: 50923504
user INTEGER, user INTEGER,
-- disallow duplicate rows
UNIQUE(envvars_json, python_implementation, python_strversion, python_hexversion, user),
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
); );
@ -131,7 +142,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE' -- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
-- we use POSIX timestamps in UTC for time recording. -- we use POSIX timestamps for time recording.
-- e.g. datetime.datetime.now().timestamp() -- e.g. datetime.datetime.now().timestamp()
start_time REAL, start_time REAL,
end_time REAL, end_time REAL,
@ -139,6 +150,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
process_id INTEGER, -- host PID of python process on host OS process_id INTEGER, -- host PID of python process on host OS
environment INTEGER NOT NULL, environment INTEGER NOT NULL,
message TEXT, -- user-defined message to help distinguish similar runs message TEXT, -- user-defined message to help distinguish similar runs
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
); );
@ -166,7 +178,7 @@ CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL, CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
module INTEGER NOT NULL, module INTEGER NOT NULL,
-- we use POSIX timestamps in UTC for time recording. -- we use POSIX timestamps for time recording.
-- e.g. datetime.datetime.now().timestamp() -- e.g. datetime.datetime.now().timestamp()
start_time REAL, start_time REAL,
end_time REAL, end_time REAL,

View File

@ -1,14 +1,59 @@
"""Utilities for creating new stores and linking between them.""" """Utilities for creating new stores and linking between them."""
from . import db, fs, machine from . import db, environment, fs, machine
import datetime
import importlib import importlib
import json import json
import os import os
from pathlib import Path from pathlib import Path
import sqlite3 import sqlite3
from typing import Callable
class Program:
def __init__(self, store, name, message):
self.store = store
self.name = name
self.message = message
self._evaluated = False
def set_start_time(self, t):
self.start_time = t
def __enter__(self):
if self._evaluated:
raise RuntimeError("Cannot re-enter a Program context")
cur = self.store.conn.cursor()
env = environment.Environment.find_or_insert(cur)
cur.execute('INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)', (
None, #id INTEGER PRIMARY KEY NOT NULL,
self.name, #name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
#-- we use POSIX timestamps for time recording.
#-- e.g. datetime.datetime.now().timestamp()
None, #start_time REAL,
None, #end_time REAL,
os.getpid(), #process_id INTEGER, -- host PID of python process on host OS
env.id, #environment INTEGER NOT NULL,
self.message, #message TEXT, -- user-defined message to help distinguish similar runs
))
self.set_start_time(datetime.datetime.now())
# track this program in the store
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
elapsed = datetime.datetime.now() - self.start_time
# record start and end times in store
self._evaluated = True # prevent re-running
class Store: class Store:
"""Describes a data directory, holds active connection to nancy.db""" """Describes a data directory, holds active connection to nancy.db"""
@ -45,7 +90,8 @@ class Store:
return self.conn return self.conn
@classmethod @classmethod
def init(cls, directory=None): def init(cls, directory=None, message=None):
start_time = datetime.datetime.now()
if directory is None: # initialize an in-memory store if directory is None: # initialize an in-memory store
db_path = ':memory:' db_path = ':memory:'
else: else:
@ -63,18 +109,12 @@ class Store:
db.init_schema(conn.cursor()) db.init_schema(conn.cursor())
new_store = cls(directory, conn) new_store = cls(directory, conn)
new_store.record_machine_description() with new_store.program('INIT', message) as p:
# set the timing to the actual times it took to initialize the db
p.set_start_time(start_time)
return new_store return new_store
def record_machine_description(self):
"""Record machine-specific information"""
cur = self.conn.cursor()
machine_info = machine.get_machine_info()
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info)
self.machine_id = cur.lastrowid
self.conn.commit()
def make_readonly(self): def make_readonly(self):
"""Make store directory read-only (except for nancy.db) and return file list""" """Make store directory read-only (except for nancy.db) and return file list"""
fs.make_readonly_recursive(self.path, excluded='./nancy.db') fs.make_readonly_recursive(self.path, excluded='./nancy.db')
@ -90,26 +130,11 @@ class Store:
else: else:
return fs.FSEntry.from_db_index(cur, root_id=root_id) return fs.FSEntry.from_db_index(cur, root_id=root_id)
def program(self, name, message=None):
return Program(self, name, message)
def freeze(self): def record(self, message=None):
# Remove write permissions on all files except nancy.db with self.program('RECORD', message) as p:
# This also catalogs all files, directories, and symlinks
# Note that ./nancy.db is NOT listed in files and is not altered
dirs, files, symlinks, irreg = self.make_readonly()
if len(irreg) > 0:
raise Exception(
"Refusing to freeze directory containing irregular files: "
", ".join(irreg),
)
for p, perms in symlinks.items():
target = os.readlink(p) # gives possibly relative path to target
resolved = os.path.realpath(p)
if not resolved.startswith(os.path.abspath(self.path)):
raise Exception(
f"Freezing {p} failed since it points to location "
f"{target} which is outside store path ({self.path})",
)
# get hashes of current directory (recursive) # get hashes of current directory (recursive)
current = fs.FSEntry.from_path(self.path) current = fs.FSEntry.from_path(self.path)
@ -130,8 +155,9 @@ class Store:
# update versions in nancy.db as appropriate # update versions in nancy.db as appropriate
# remove write permissions on nancy.db #p.unlink_file()
remove_write_perms(os.path.join(self.path, 'nancy.db')) #p.record_file()
# all effected files
#@contextmanager #@contextmanager
def run( def run(

72
src/nancy/user.py Normal file
View File

@ -0,0 +1,72 @@
from . import machine, store
import getpass
import os
import pwd
from typing import NamedTuple
class User(NamedTuple):
id: int # if not None, this is `id` in the `machine` table
username: str
userid: int
fullname: str
machine: machine.Machine
@classmethod
def find_or_insert(cls, cur, user=None):
"""Given a DB cursor, find or create row in user table and fill"""
if user is None:
user = cls.detect()
m = machine.Machine.find_or_insert(cur)
user = user._replace(machine=m.id)
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
user
WHERE
username = ? AND
userid = ? AND
fullname = ? AND
machine = ?
LIMIT 1
''',
user[1:],
)
res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO user VALUES (?,?,?,?,?);
''',
user,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return user._replace(id=id)
@classmethod
def detect(cls):
"""Detect values for user independent of the database.
Note that the machine entry will not have a valid id.
"""
# TODO: will this fail on Windows/OSX?
fullname = pwd.getpwuid(os.getuid()).pw_gecos
m = machine.Machine.detect()
return cls(
None,
getpass.getuser(),
os.getuid(),
fullname,
m.id,
)

View File

@ -115,7 +115,7 @@ def test_invalid_user_machine(insert_user):
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
'jacob', #username TEXT NOT NULL, 'jacob', #username TEXT NOT NULL,
101, #userid INTEGER, 101, #userid INTEGER,
'Bozo the Clown', #fullname TEXT, 'Jacob Hinkle', #fullname TEXT,
1, #machine INTEGER NOT NULL, 1, #machine INTEGER NOT NULL,
), ),
) )