Add Program, Environment, User, Machine, with auto insertion

Also removed freeze and thaw, and converted to record().
This commit is contained in:
Jacob Hinkle 2022-09-21 20:51:57 -04:00
parent d0f113645b
commit f18368f1e0
10 changed files with 369 additions and 98 deletions

View File

@ -2,7 +2,8 @@ import click
from ..version import __version__
from .freeze import freeze, thaw
#from .freeze import freeze, thaw
from .record import record
# from https://click.palletsprojects.com/en/5.x/advanced/
@ -33,6 +34,7 @@ def main():
"""
pass
main.add_command(freeze)
main.add_command(thaw)
#main.add_command(freeze)
#main.add_command(thaw)
main.add_command(record)
main.add_command(version)

29
src/nancy/cli/record.py Normal file
View File

@ -0,0 +1,29 @@
import click
from .. import store
import os
@click.command()
@click.argument("directory", default='.')
def record(directory):
"""
Initialize tracking or record changes to a tracked directory.
If DIRECTORY is not already part of an existing nancy store, then a new
'nancy.db' file is created in that directory. On the other hand, if the
directory is part of an existing store, it will be updated and versions
of any files changes since the last recording will be incremented.
"""
if not os.path.isdir(directory):
raise ValueError(f"Cannot record non-existent directory {directory}")
existing_store = store.find_store(directory)
if existing_store is None: # this is a new store
s = store.Store.init(directory)
else: # this is an existing store
s = store.Store(directory)
s.record()

View File

@ -3,10 +3,22 @@ import os
import sqlite3
# This matches the recorded user_version in any nancy.db initialized in this
# process.
# This will match the user_version in any nancy.db initialized by this process
schema_version = 0
# SQLite features we rely on:
# UPSERT (3.24.0, 2018-06-04)
min_sqlite_version = (3, 24, 0)
sqlite_version = sqlite3.sqlite_version_info
sqlite_verstr = '.'.join(str(v) for v in sqlite_version)
min_sqlite_verstr = '.'.join(str(v) for v in min_sqlite_version)
(major, sub, minor) = sqlite_version
if major < min_sqlite_version[0] or \
sub < min_sqlite_version[1] or \
minor < min_sqlite_version[2]:
warning.warn(f"Minimum sqlite version is {min_sqlite_verstr}. Found {sqlite_verstr}")
def init_schema(cur):
"""Initialize a database following the current schema."""

73
src/nancy/environment.py Normal file
View File

@ -0,0 +1,73 @@
from . import user
from typing import NamedTuple
import json
import os
import platform
import sys
import time
class Environment(NamedTuple):
id: int
envvars_json: str
python_implementation: str
python_strversion: str
python_hexversion: int
user: user.User
@classmethod
def find_or_insert(cls, cur, env=None):
"""Given a DB cursor, find or create row in environment table and fill"""
if env is None:
env = cls.detect()
u = user.User.find_or_insert(cur)
env = env._replace(user=u.id)
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
environment
WHERE
envvars_json = ? AND
python_implementation = ? AND
python_strversion = ? AND
python_hexversion = ? AND
user = ?
LIMIT 1
''',
env[1:],
)
res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO environment VALUES (?,?,?,?,?,?);
''',
env,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return env._replace(id=id)
@classmethod
def detect(cls):
"""Detect values for environment independent of the database.
Note that the user entry will not have a valid id.
"""
u = user.User.detect()
return cls(
None,
json.dumps(dict(os.environ)),
platform.python_implementation(),
sys.version,
sys.hexversion,
u.id,
)

View File

@ -47,18 +47,18 @@ def remove_write_perms(path):
def make_readonly_recursive(path, excluded=[]):
"""Recursively "freeze" a directory by setting all files and directories read-only"""
# traversing bottom-up makes it easier to freeze perms on directories
for root, dirs, files in os.walk(self.path, topdown=False):
for root, dirs, files in os.walk(path, topdown=False):
for f in files:
p = os.path.join(root, f)
if p in excluded:
continue
remove_write_perms(os.path.join(self.path, p))
remove_write_perms(os.path.join(path, p))
for d in dirs:
p = os.path.join(root, d)
if p in excluded:
continue
remove_write_perms(os.path.join(self.path, p))
remove_write_perms(os.path.join(path, p))
@dataclass

View File

@ -1,24 +1,69 @@
from collections import namedtuple
from typing import NamedTuple
import json
import platform
import time
MachineInfo = namedtuple(
'MachineInfo',
[
'machine_id',
'hostname',
'processor',
'system',
'release',
'cpu_type',
'timezone',
'freedesktop_os_release',
'win32_ver',
'mac_ver',
],
)
def get_machine_info():
class Machine(NamedTuple):
id: int
machine_id: str
hostname: str
processor: str
system: str
release: str
cpu_type: str
timezone: str
freedesktop_os_release: str
win32_ver: str
mac_ver: str
@classmethod
def find_or_insert(cls, cur, machine=None):
"""Given a DB cursor, find or create row in machine table and fill"""
if machine is None:
machine = cls.detect()
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
machine
WHERE
machine_id = ? AND
hostname = ? AND
processor = ? AND
system = ? AND
release = ? AND
cpu_type = ? AND
timezone = ? AND
freedesktop_os_release = ? AND
win32_ver = ? AND
mac_ver = ?
LIMIT 1
''',
machine[1:]
)
res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO machine VALUES (?,?,?,?,?,?,?,?,?,?,?);
''',
machine,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return machine._replace(id=id)
@classmethod
def detect(cls):
"""Formats machine-specific information into a MachineInfo object.
Note that 'MachineInfo' objects are properly formatted to be inserted into
the `machine` table.
"""
fdor = ''
try:
fdor = json.dumps(platform.freedesktop_os_release())
@ -34,7 +79,8 @@ def get_machine_info():
except FileNotFoundError:
pass
return MachineInfo(
return cls(
None,
machine_id=mid,
hostname=platform.node(),
processor=platform.processor(),
@ -47,4 +93,3 @@ def get_machine_info():
mac_ver=json.dumps(platform.mac_ver()),
)

View File

@ -35,7 +35,11 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
-- '22.05'}"
win32_ver TEXT, -- platform.win32_ver() as JSON
mac_ver TEXT -- platform.mac_ver() as JSON
mac_ver TEXT, -- platform.mac_ver() as JSON
-- disallow duplicate rows
UNIQUE(hostname, processor, system, release, cpu_type, timezone,
freedesktop_os_release, win32_ver, mac_ver)
);
-- Programs are run by users on machines
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
@ -45,7 +49,11 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
machine INTEGER NOT NULL,
UNIQUE(userid, machine),
-- usernames and fullnames change rarely, but it happens. A more restrictive
-- constraint would not enable tracking of this info. The following only
-- guarantees no duplicate full rows.
UNIQUE(username, userid, fullname, machine),
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
);
@ -118,6 +126,9 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
python_hexversion INTEGER, -- sys.hexversion: 50923504
user INTEGER,
-- disallow duplicate rows
UNIQUE(envvars_json, python_implementation, python_strversion, python_hexversion, user),
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
);
@ -131,7 +142,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
-- we use POSIX timestamps in UTC for time recording.
-- we use POSIX timestamps for time recording.
-- e.g. datetime.datetime.now().timestamp()
start_time REAL,
end_time REAL,
@ -139,6 +150,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
process_id INTEGER, -- host PID of python process on host OS
environment INTEGER NOT NULL,
message TEXT, -- user-defined message to help distinguish similar runs
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
);
@ -166,7 +178,7 @@ CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
module INTEGER NOT NULL,
-- we use POSIX timestamps in UTC for time recording.
-- we use POSIX timestamps for time recording.
-- e.g. datetime.datetime.now().timestamp()
start_time REAL,
end_time REAL,

View File

@ -1,14 +1,59 @@
"""Utilities for creating new stores and linking between them."""
from . import db, fs, machine
from . import db, environment, fs, machine
import datetime
import importlib
import json
import os
from pathlib import Path
import sqlite3
from typing import Callable
class Program:
def __init__(self, store, name, message):
self.store = store
self.name = name
self.message = message
self._evaluated = False
def set_start_time(self, t):
self.start_time = t
def __enter__(self):
if self._evaluated:
raise RuntimeError("Cannot re-enter a Program context")
cur = self.store.conn.cursor()
env = environment.Environment.find_or_insert(cur)
cur.execute('INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)', (
None, #id INTEGER PRIMARY KEY NOT NULL,
self.name, #name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
#-- we use POSIX timestamps for time recording.
#-- e.g. datetime.datetime.now().timestamp()
None, #start_time REAL,
None, #end_time REAL,
os.getpid(), #process_id INTEGER, -- host PID of python process on host OS
env.id, #environment INTEGER NOT NULL,
self.message, #message TEXT, -- user-defined message to help distinguish similar runs
))
self.set_start_time(datetime.datetime.now())
# track this program in the store
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
elapsed = datetime.datetime.now() - self.start_time
# record start and end times in store
self._evaluated = True # prevent re-running
class Store:
"""Describes a data directory, holds active connection to nancy.db"""
@ -45,7 +90,8 @@ class Store:
return self.conn
@classmethod
def init(cls, directory=None):
def init(cls, directory=None, message=None):
start_time = datetime.datetime.now()
if directory is None: # initialize an in-memory store
db_path = ':memory:'
else:
@ -63,18 +109,12 @@ class Store:
db.init_schema(conn.cursor())
new_store = cls(directory, conn)
new_store.record_machine_description()
with new_store.program('INIT', message) as p:
# set the timing to the actual times it took to initialize the db
p.set_start_time(start_time)
return new_store
def record_machine_description(self):
"""Record machine-specific information"""
cur = self.conn.cursor()
machine_info = machine.get_machine_info()
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info)
self.machine_id = cur.lastrowid
self.conn.commit()
def make_readonly(self):
"""Make store directory read-only (except for nancy.db) and return file list"""
fs.make_readonly_recursive(self.path, excluded='./nancy.db')
@ -90,26 +130,11 @@ class Store:
else:
return fs.FSEntry.from_db_index(cur, root_id=root_id)
def program(self, name, message=None):
return Program(self, name, message)
def freeze(self):
# Remove write permissions on all files except nancy.db
# This also catalogs all files, directories, and symlinks
# Note that ./nancy.db is NOT listed in files and is not altered
dirs, files, symlinks, irreg = self.make_readonly()
if len(irreg) > 0:
raise Exception(
"Refusing to freeze directory containing irregular files: "
", ".join(irreg),
)
for p, perms in symlinks.items():
target = os.readlink(p) # gives possibly relative path to target
resolved = os.path.realpath(p)
if not resolved.startswith(os.path.abspath(self.path)):
raise Exception(
f"Freezing {p} failed since it points to location "
f"{target} which is outside store path ({self.path})",
)
def record(self, message=None):
with self.program('RECORD', message) as p:
# get hashes of current directory (recursive)
current = fs.FSEntry.from_path(self.path)
@ -130,8 +155,9 @@ class Store:
# update versions in nancy.db as appropriate
# remove write permissions on nancy.db
remove_write_perms(os.path.join(self.path, 'nancy.db'))
#p.unlink_file()
#p.record_file()
# all effected files
#@contextmanager
def run(

72
src/nancy/user.py Normal file
View File

@ -0,0 +1,72 @@
from . import machine, store
import getpass
import os
import pwd
from typing import NamedTuple
class User(NamedTuple):
id: int # if not None, this is `id` in the `machine` table
username: str
userid: int
fullname: str
machine: machine.Machine
@classmethod
def find_or_insert(cls, cur, user=None):
"""Given a DB cursor, find or create row in user table and fill"""
if user is None:
user = cls.detect()
m = machine.Machine.find_or_insert(cur)
user = user._replace(machine=m.id)
# insert or ignore, handle each case to set id
cur.execute('''
SELECT
id
FROM
user
WHERE
username = ? AND
userid = ? AND
fullname = ? AND
machine = ?
LIMIT 1
''',
user[1:],
)
res = cur.fetchone()
if res is None:
cur.execute('''
INSERT INTO user VALUES (?,?,?,?,?);
''',
user,
)
id = cur.lastrowid
cur.connection.commit()
else:
id = res[0]
return user._replace(id=id)
@classmethod
def detect(cls):
"""Detect values for user independent of the database.
Note that the machine entry will not have a valid id.
"""
# TODO: will this fail on Windows/OSX?
fullname = pwd.getpwuid(os.getuid()).pw_gecos
m = machine.Machine.detect()
return cls(
None,
getpass.getuser(),
os.getuid(),
fullname,
m.id,
)

View File

@ -115,7 +115,7 @@ def test_invalid_user_machine(insert_user):
None, #id INTEGER PRIMARY KEY NOT NULL,
'jacob', #username TEXT NOT NULL,
101, #userid INTEGER,
'Bozo the Clown', #fullname TEXT,
'Jacob Hinkle', #fullname TEXT,
1, #machine INTEGER NOT NULL,
),
)