Add Program, Environment, User, Machine, with auto insertion
Also removed freeze and thaw, and converted to record().
This commit is contained in:
parent
d0f113645b
commit
f18368f1e0
@ -2,7 +2,8 @@ import click
|
||||
|
||||
from ..version import __version__
|
||||
|
||||
from .freeze import freeze, thaw
|
||||
#from .freeze import freeze, thaw
|
||||
from .record import record
|
||||
|
||||
|
||||
# from https://click.palletsprojects.com/en/5.x/advanced/
|
||||
@ -33,6 +34,7 @@ def main():
|
||||
|
||||
"""
|
||||
pass
|
||||
main.add_command(freeze)
|
||||
main.add_command(thaw)
|
||||
#main.add_command(freeze)
|
||||
#main.add_command(thaw)
|
||||
main.add_command(record)
|
||||
main.add_command(version)
|
||||
|
||||
29
src/nancy/cli/record.py
Normal file
29
src/nancy/cli/record.py
Normal file
@ -0,0 +1,29 @@
|
||||
import click
|
||||
|
||||
from .. import store
|
||||
|
||||
import os
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("directory", default='.')
|
||||
def record(directory):
|
||||
"""
|
||||
Initialize tracking or record changes to a tracked directory.
|
||||
|
||||
If DIRECTORY is not already part of an existing nancy store, then a new
|
||||
'nancy.db' file is created in that directory. On the other hand, if the
|
||||
directory is part of an existing store, it will be updated and versions
|
||||
of any files changes since the last recording will be incremented.
|
||||
"""
|
||||
if not os.path.isdir(directory):
|
||||
raise ValueError(f"Cannot record non-existent directory {directory}")
|
||||
|
||||
existing_store = store.find_store(directory)
|
||||
if existing_store is None: # this is a new store
|
||||
s = store.Store.init(directory)
|
||||
else: # this is an existing store
|
||||
s = store.Store(directory)
|
||||
|
||||
s.record()
|
||||
|
||||
@ -3,10 +3,22 @@ import os
|
||||
import sqlite3
|
||||
|
||||
|
||||
# This matches the recorded user_version in any nancy.db initialized in this
|
||||
# process.
|
||||
# This will match the user_version in any nancy.db initialized by this process
|
||||
schema_version = 0
|
||||
|
||||
# SQLite features we rely on:
|
||||
# UPSERT (3.24.0, 2018-06-04)
|
||||
min_sqlite_version = (3, 24, 0)
|
||||
|
||||
sqlite_version = sqlite3.sqlite_version_info
|
||||
sqlite_verstr = '.'.join(str(v) for v in sqlite_version)
|
||||
min_sqlite_verstr = '.'.join(str(v) for v in min_sqlite_version)
|
||||
(major, sub, minor) = sqlite_version
|
||||
if major < min_sqlite_version[0] or \
|
||||
sub < min_sqlite_version[1] or \
|
||||
minor < min_sqlite_version[2]:
|
||||
warning.warn(f"Minimum sqlite version is {min_sqlite_verstr}. Found {sqlite_verstr}")
|
||||
|
||||
|
||||
def init_schema(cur):
|
||||
"""Initialize a database following the current schema."""
|
||||
|
||||
73
src/nancy/environment.py
Normal file
73
src/nancy/environment.py
Normal file
@ -0,0 +1,73 @@
|
||||
from . import user
|
||||
|
||||
from typing import NamedTuple
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
|
||||
class Environment(NamedTuple):
|
||||
id: int
|
||||
envvars_json: str
|
||||
python_implementation: str
|
||||
python_strversion: str
|
||||
python_hexversion: int
|
||||
user: user.User
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(cls, cur, env=None):
|
||||
"""Given a DB cursor, find or create row in environment table and fill"""
|
||||
if env is None:
|
||||
env = cls.detect()
|
||||
|
||||
u = user.User.find_or_insert(cur)
|
||||
|
||||
env = env._replace(user=u.id)
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute('''
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
environment
|
||||
WHERE
|
||||
envvars_json = ? AND
|
||||
python_implementation = ? AND
|
||||
python_strversion = ? AND
|
||||
python_hexversion = ? AND
|
||||
user = ?
|
||||
LIMIT 1
|
||||
''',
|
||||
env[1:],
|
||||
)
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute('''
|
||||
INSERT INTO environment VALUES (?,?,?,?,?,?);
|
||||
''',
|
||||
env,
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return env._replace(id=id)
|
||||
|
||||
@classmethod
|
||||
def detect(cls):
|
||||
"""Detect values for environment independent of the database.
|
||||
|
||||
Note that the user entry will not have a valid id.
|
||||
"""
|
||||
u = user.User.detect()
|
||||
|
||||
return cls(
|
||||
None,
|
||||
json.dumps(dict(os.environ)),
|
||||
platform.python_implementation(),
|
||||
sys.version,
|
||||
sys.hexversion,
|
||||
u.id,
|
||||
)
|
||||
@ -47,18 +47,18 @@ def remove_write_perms(path):
|
||||
def make_readonly_recursive(path, excluded=[]):
|
||||
"""Recursively "freeze" a directory by setting all files and directories read-only"""
|
||||
# traversing bottom-up makes it easier to freeze perms on directories
|
||||
for root, dirs, files in os.walk(self.path, topdown=False):
|
||||
for root, dirs, files in os.walk(path, topdown=False):
|
||||
for f in files:
|
||||
p = os.path.join(root, f)
|
||||
if p in excluded:
|
||||
continue
|
||||
remove_write_perms(os.path.join(self.path, p))
|
||||
remove_write_perms(os.path.join(path, p))
|
||||
|
||||
for d in dirs:
|
||||
p = os.path.join(root, d)
|
||||
if p in excluded:
|
||||
continue
|
||||
remove_write_perms(os.path.join(self.path, p))
|
||||
remove_write_perms(os.path.join(path, p))
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@ -1,24 +1,69 @@
|
||||
from collections import namedtuple
|
||||
from typing import NamedTuple
|
||||
import json
|
||||
import platform
|
||||
import time
|
||||
|
||||
MachineInfo = namedtuple(
|
||||
'MachineInfo',
|
||||
[
|
||||
'machine_id',
|
||||
'hostname',
|
||||
'processor',
|
||||
'system',
|
||||
'release',
|
||||
'cpu_type',
|
||||
'timezone',
|
||||
'freedesktop_os_release',
|
||||
'win32_ver',
|
||||
'mac_ver',
|
||||
],
|
||||
class Machine(NamedTuple):
|
||||
id: int
|
||||
machine_id: str
|
||||
hostname: str
|
||||
processor: str
|
||||
system: str
|
||||
release: str
|
||||
cpu_type: str
|
||||
timezone: str
|
||||
freedesktop_os_release: str
|
||||
win32_ver: str
|
||||
mac_ver: str
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(cls, cur, machine=None):
|
||||
"""Given a DB cursor, find or create row in machine table and fill"""
|
||||
if machine is None:
|
||||
machine = cls.detect()
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute('''
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
machine
|
||||
WHERE
|
||||
machine_id = ? AND
|
||||
hostname = ? AND
|
||||
processor = ? AND
|
||||
system = ? AND
|
||||
release = ? AND
|
||||
cpu_type = ? AND
|
||||
timezone = ? AND
|
||||
freedesktop_os_release = ? AND
|
||||
win32_ver = ? AND
|
||||
mac_ver = ?
|
||||
LIMIT 1
|
||||
''',
|
||||
machine[1:]
|
||||
)
|
||||
def get_machine_info():
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute('''
|
||||
INSERT INTO machine VALUES (?,?,?,?,?,?,?,?,?,?,?);
|
||||
''',
|
||||
machine,
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return machine._replace(id=id)
|
||||
|
||||
@classmethod
|
||||
def detect(cls):
|
||||
"""Formats machine-specific information into a MachineInfo object.
|
||||
|
||||
Note that 'MachineInfo' objects are properly formatted to be inserted into
|
||||
the `machine` table.
|
||||
"""
|
||||
fdor = ''
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
@ -34,7 +79,8 @@ def get_machine_info():
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return MachineInfo(
|
||||
return cls(
|
||||
None,
|
||||
machine_id=mid,
|
||||
hostname=platform.node(),
|
||||
processor=platform.processor(),
|
||||
@ -47,4 +93,3 @@ def get_machine_info():
|
||||
mac_ver=json.dumps(platform.mac_ver()),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -35,7 +35,11 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- 'VERSION': '22.05 (Quokka)', 'VERSION_CODENAME': 'quokka', 'VERSION_ID':
|
||||
-- '22.05'}"
|
||||
win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
mac_ver TEXT, -- platform.mac_ver() as JSON
|
||||
|
||||
-- disallow duplicate rows
|
||||
UNIQUE(hostname, processor, system, release, cpu_type, timezone,
|
||||
freedesktop_os_release, win32_ver, mac_ver)
|
||||
);
|
||||
-- Programs are run by users on machines
|
||||
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
@ -45,7 +49,11 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
|
||||
machine INTEGER NOT NULL,
|
||||
|
||||
UNIQUE(userid, machine),
|
||||
-- usernames and fullnames change rarely, but it happens. A more restrictive
|
||||
-- constraint would not enable tracking of this info. The following only
|
||||
-- guarantees no duplicate full rows.
|
||||
UNIQUE(username, userid, fullname, machine),
|
||||
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
@ -118,6 +126,9 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
python_hexversion INTEGER, -- sys.hexversion: 50923504
|
||||
user INTEGER,
|
||||
|
||||
-- disallow duplicate rows
|
||||
UNIQUE(envvars_json, python_implementation, python_strversion, python_hexversion, user),
|
||||
|
||||
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
@ -131,7 +142,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
|
||||
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
|
||||
|
||||
-- we use POSIX timestamps in UTC for time recording.
|
||||
-- we use POSIX timestamps for time recording.
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
@ -139,6 +150,7 @@ CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
process_id INTEGER, -- host PID of python process on host OS
|
||||
environment INTEGER NOT NULL,
|
||||
message TEXT, -- user-defined message to help distinguish similar runs
|
||||
|
||||
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
@ -166,7 +178,7 @@ CREATE TABLE py_module(id INTEGER PRIMARY KEY NOT NULL,
|
||||
CREATE TABLE func(id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
module INTEGER NOT NULL,
|
||||
-- we use POSIX timestamps in UTC for time recording.
|
||||
-- we use POSIX timestamps for time recording.
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
|
||||
@ -1,14 +1,59 @@
|
||||
"""Utilities for creating new stores and linking between them."""
|
||||
|
||||
from . import db, fs, machine
|
||||
from . import db, environment, fs, machine
|
||||
|
||||
import datetime
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
from typing import Callable
|
||||
|
||||
|
||||
class Program:
|
||||
def __init__(self, store, name, message):
|
||||
self.store = store
|
||||
self.name = name
|
||||
self.message = message
|
||||
|
||||
self._evaluated = False
|
||||
|
||||
def set_start_time(self, t):
|
||||
self.start_time = t
|
||||
|
||||
def __enter__(self):
|
||||
if self._evaluated:
|
||||
raise RuntimeError("Cannot re-enter a Program context")
|
||||
|
||||
cur = self.store.conn.cursor()
|
||||
env = environment.Environment.find_or_insert(cur)
|
||||
|
||||
cur.execute('INSERT INTO program VALUES (?, ?, ?, ?, ?, ?, ?)', (
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
self.name, #name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
|
||||
|
||||
#-- we use POSIX timestamps for time recording.
|
||||
#-- e.g. datetime.datetime.now().timestamp()
|
||||
None, #start_time REAL,
|
||||
None, #end_time REAL,
|
||||
|
||||
os.getpid(), #process_id INTEGER, -- host PID of python process on host OS
|
||||
env.id, #environment INTEGER NOT NULL,
|
||||
self.message, #message TEXT, -- user-defined message to help distinguish similar runs
|
||||
))
|
||||
|
||||
self.set_start_time(datetime.datetime.now())
|
||||
# track this program in the store
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_traceback):
|
||||
elapsed = datetime.datetime.now() - self.start_time
|
||||
# record start and end times in store
|
||||
|
||||
self._evaluated = True # prevent re-running
|
||||
|
||||
|
||||
class Store:
|
||||
"""Describes a data directory, holds active connection to nancy.db"""
|
||||
@ -45,7 +90,8 @@ class Store:
|
||||
return self.conn
|
||||
|
||||
@classmethod
|
||||
def init(cls, directory=None):
|
||||
def init(cls, directory=None, message=None):
|
||||
start_time = datetime.datetime.now()
|
||||
if directory is None: # initialize an in-memory store
|
||||
db_path = ':memory:'
|
||||
else:
|
||||
@ -63,18 +109,12 @@ class Store:
|
||||
db.init_schema(conn.cursor())
|
||||
new_store = cls(directory, conn)
|
||||
|
||||
new_store.record_machine_description()
|
||||
with new_store.program('INIT', message) as p:
|
||||
# set the timing to the actual times it took to initialize the db
|
||||
p.set_start_time(start_time)
|
||||
|
||||
return new_store
|
||||
|
||||
def record_machine_description(self):
|
||||
"""Record machine-specific information"""
|
||||
cur = self.conn.cursor()
|
||||
machine_info = machine.get_machine_info()
|
||||
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info)
|
||||
self.machine_id = cur.lastrowid
|
||||
self.conn.commit()
|
||||
|
||||
def make_readonly(self):
|
||||
"""Make store directory read-only (except for nancy.db) and return file list"""
|
||||
fs.make_readonly_recursive(self.path, excluded='./nancy.db')
|
||||
@ -90,26 +130,11 @@ class Store:
|
||||
else:
|
||||
return fs.FSEntry.from_db_index(cur, root_id=root_id)
|
||||
|
||||
def program(self, name, message=None):
|
||||
return Program(self, name, message)
|
||||
|
||||
def freeze(self):
|
||||
# Remove write permissions on all files except nancy.db
|
||||
# This also catalogs all files, directories, and symlinks
|
||||
# Note that ./nancy.db is NOT listed in files and is not altered
|
||||
dirs, files, symlinks, irreg = self.make_readonly()
|
||||
if len(irreg) > 0:
|
||||
raise Exception(
|
||||
"Refusing to freeze directory containing irregular files: "
|
||||
", ".join(irreg),
|
||||
)
|
||||
for p, perms in symlinks.items():
|
||||
target = os.readlink(p) # gives possibly relative path to target
|
||||
resolved = os.path.realpath(p)
|
||||
if not resolved.startswith(os.path.abspath(self.path)):
|
||||
raise Exception(
|
||||
f"Freezing {p} failed since it points to location "
|
||||
f"{target} which is outside store path ({self.path})",
|
||||
)
|
||||
|
||||
def record(self, message=None):
|
||||
with self.program('RECORD', message) as p:
|
||||
# get hashes of current directory (recursive)
|
||||
current = fs.FSEntry.from_path(self.path)
|
||||
|
||||
@ -130,8 +155,9 @@ class Store:
|
||||
|
||||
# update versions in nancy.db as appropriate
|
||||
|
||||
# remove write permissions on nancy.db
|
||||
remove_write_perms(os.path.join(self.path, 'nancy.db'))
|
||||
#p.unlink_file()
|
||||
#p.record_file()
|
||||
# all effected files
|
||||
|
||||
#@contextmanager
|
||||
def run(
|
||||
|
||||
72
src/nancy/user.py
Normal file
72
src/nancy/user.py
Normal file
@ -0,0 +1,72 @@
|
||||
from . import machine, store
|
||||
|
||||
import getpass
|
||||
import os
|
||||
import pwd
|
||||
from typing import NamedTuple
|
||||
|
||||
class User(NamedTuple):
|
||||
id: int # if not None, this is `id` in the `machine` table
|
||||
username: str
|
||||
userid: int
|
||||
fullname: str
|
||||
machine: machine.Machine
|
||||
|
||||
@classmethod
|
||||
def find_or_insert(cls, cur, user=None):
|
||||
"""Given a DB cursor, find or create row in user table and fill"""
|
||||
if user is None:
|
||||
user = cls.detect()
|
||||
|
||||
m = machine.Machine.find_or_insert(cur)
|
||||
|
||||
user = user._replace(machine=m.id)
|
||||
|
||||
# insert or ignore, handle each case to set id
|
||||
cur.execute('''
|
||||
SELECT
|
||||
id
|
||||
FROM
|
||||
user
|
||||
WHERE
|
||||
username = ? AND
|
||||
userid = ? AND
|
||||
fullname = ? AND
|
||||
machine = ?
|
||||
LIMIT 1
|
||||
''',
|
||||
user[1:],
|
||||
)
|
||||
res = cur.fetchone()
|
||||
if res is None:
|
||||
cur.execute('''
|
||||
INSERT INTO user VALUES (?,?,?,?,?);
|
||||
''',
|
||||
user,
|
||||
)
|
||||
id = cur.lastrowid
|
||||
cur.connection.commit()
|
||||
else:
|
||||
id = res[0]
|
||||
|
||||
return user._replace(id=id)
|
||||
|
||||
@classmethod
|
||||
def detect(cls):
|
||||
"""Detect values for user independent of the database.
|
||||
|
||||
Note that the machine entry will not have a valid id.
|
||||
"""
|
||||
# TODO: will this fail on Windows/OSX?
|
||||
fullname = pwd.getpwuid(os.getuid()).pw_gecos
|
||||
|
||||
m = machine.Machine.detect()
|
||||
|
||||
return cls(
|
||||
None,
|
||||
getpass.getuser(),
|
||||
os.getuid(),
|
||||
fullname,
|
||||
m.id,
|
||||
)
|
||||
|
||||
@ -115,7 +115,7 @@ def test_invalid_user_machine(insert_user):
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
101, #userid INTEGER,
|
||||
'Bozo the Clown', #fullname TEXT,
|
||||
'Jacob Hinkle', #fullname TEXT,
|
||||
1, #machine INTEGER NOT NULL,
|
||||
),
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user