Create {machine,fs}.py, file scanning, and updated tests
This also fixes the pytest fixture setup (I think).
This commit is contained in:
parent
c30ca6a083
commit
e75af52765
209
src/nancy/fs.py
Normal file
209
src/nancy/fs.py
Normal file
@ -0,0 +1,209 @@
|
||||
"""Interaction with the filesystem and with file database entries"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
import hashlib
|
||||
import operator
|
||||
import os
|
||||
import stat
|
||||
from typing import List
|
||||
import warnings
|
||||
|
||||
|
||||
def remove_write_perms(path):
|
||||
"""Remove write permissions for all users while preserving other perms"""
|
||||
if not os.path.islink(path):
|
||||
s = os.stat(path)
|
||||
orig_perm_string = stat.filemode(s.st_mode)
|
||||
os.chmod(
|
||||
path,
|
||||
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
|
||||
)
|
||||
follow_symlinks = False
|
||||
else:
|
||||
if os.stat not in os.supports_follow_symlinks:
|
||||
# can't stat this thing directly on this platform
|
||||
# means we can only stat the content.
|
||||
# In this case, we return None and do not lock this link
|
||||
warnings.warn("This platform cannot stat symlinks. Will not set them read-only.")
|
||||
return None
|
||||
s = os.stat(path)
|
||||
orig_perm_string = stat.filemode(s.st_mode)
|
||||
if os.chmod in os.supports_follow_symlinks:
|
||||
follow_symlinks = True
|
||||
else:
|
||||
warnings.warn(
|
||||
": Platform does not support chmod of symlinks. "
|
||||
"Links will not be set read-only.",
|
||||
)
|
||||
return orig_perm_string
|
||||
os.chmod(
|
||||
path,
|
||||
s.st_mode & -(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH),
|
||||
follow_symlinks=follow_symlinks,
|
||||
)
|
||||
return orig_perm_string
|
||||
|
||||
|
||||
def make_readonly_recursive(path, excluded=[]):
|
||||
"""Recursively "freeze" a directory by setting all files and directories read-only"""
|
||||
# traversing bottom-up makes it easier to freeze perms on directories
|
||||
for root, dirs, files in os.walk(self.path, topdown=False):
|
||||
for f in files:
|
||||
p = os.path.join(root, f)
|
||||
if p in excluded:
|
||||
continue
|
||||
remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
for d in dirs:
|
||||
p = os.path.join(root, d)
|
||||
if p in excluded:
|
||||
continue
|
||||
remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
|
||||
@dataclass
|
||||
class FSEntry:
|
||||
"""A hashed file or directory."""
|
||||
name: str # with parent directory stripped. None if this is the root
|
||||
relpath: str # relative to some root directory
|
||||
# children for dirs only: non-recursive; files/dirs at this level only
|
||||
children: List['FSEntry']
|
||||
filetype: str # regular, symlink, special (block, char, pipe, or socket)
|
||||
perms: str # e.g. '-rw-rw-r--'
|
||||
# For directories, sha256 is computed from concatenation of all children
|
||||
# sorted alphabetically regardless of type using "{name}{hash}" format
|
||||
sha256: bytes # sha256 of content for regular files, or of target for links
|
||||
|
||||
def __post_init__(self):
|
||||
# compute sha256 if it's missing
|
||||
if self.sha256 is None:
|
||||
m = hashlib.sha256()
|
||||
for d in self.subdirs:
|
||||
assert d.sha256 is not None
|
||||
m.update(bytes(d.dirname, 'utf-8'))
|
||||
m.update(d.sha256)
|
||||
for f in self.files:
|
||||
assert f.sha256 is not None
|
||||
m.update(bytes(f.filename, 'utf-8'))
|
||||
m.update(f.sha256)
|
||||
self.sha256 = m.digest()
|
||||
|
||||
@classmethod
|
||||
def from_path(cls, root, relpath='.', filestat=None):
|
||||
"""
|
||||
Scan a path to instantiate (recursive).
|
||||
"""
|
||||
m = hashlib.sha256()
|
||||
|
||||
path = os.path.join(root, relpath)
|
||||
|
||||
filestat = os.stat(path) if filestat is None else filestat
|
||||
s = filestat.st_mode
|
||||
|
||||
children = []
|
||||
if stat.S_ISLNK(s):
|
||||
# Check links first, since it is not exclusive with dir or file checks
|
||||
filetype = 'LNK'
|
||||
m.update(bytes(os.readlink(path), 'utf-8'))
|
||||
print(path, "is link")
|
||||
elif stat.S_ISDIR(s):
|
||||
filetype = 'DIR'
|
||||
|
||||
# this prevents a directory's hash from colliding with a file hash
|
||||
# in cases where it only holds a single file
|
||||
m.update(bytes(relpath, 'utf-8'))
|
||||
|
||||
# we use os.scandir which returns a DirEntry for each child
|
||||
# excluding "." and "..". These variables hold a .stat which we can
|
||||
# use to avoid having to query the filesystem twice.
|
||||
direntries = list(os.scandir(path))
|
||||
direntries.sort(key=operator.attrgetter('name'))
|
||||
|
||||
children = [
|
||||
cls.from_path(
|
||||
root=root,
|
||||
relpath=os.path.join(relpath, e.name),
|
||||
filestat=e.stat(),
|
||||
)
|
||||
for e in direntries
|
||||
]
|
||||
for c in children: # now hash concatenated sorted hashes
|
||||
m.update(c.sha256)
|
||||
elif stat.S_ISREG(s):
|
||||
filetype = 'REG'
|
||||
m.update(open(path, 'rb').read())
|
||||
elif stat.S_ISSOCK(s):
|
||||
filetype = 'SOCK'
|
||||
elif stat.S_ISCHR(s):
|
||||
filetype = 'CHR'
|
||||
elif stat.S_ISBLK(s):
|
||||
filetype = 'BLK'
|
||||
elif stat.S_ISFIFO(s):
|
||||
filetype = 'FIFO'
|
||||
elif stat.S_ISDOOR(s):
|
||||
filetype = 'DOOR'
|
||||
elif stat.S_ISPORT(s):
|
||||
filetype = 'PORT'
|
||||
elif stat.S_ISWHT(s):
|
||||
filetype = 'WHT'
|
||||
else:
|
||||
filetype = 'OTHER'
|
||||
|
||||
sha256 = m.digest()
|
||||
|
||||
return cls(
|
||||
name=os.path.basename(relpath),
|
||||
relpath=relpath,
|
||||
children=children,
|
||||
filetype=filetype,
|
||||
perms=stat.filemode(filestat.st_mode),
|
||||
sha256=sha256,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def empty_root(cls):
|
||||
"""Just a standardized value indicating an empty root directory"""
|
||||
return cls(
|
||||
name='.',
|
||||
relpath='.',
|
||||
children=[],
|
||||
filetype='DIR',
|
||||
perms='----------',
|
||||
sha256=hashlib.sha256().digest(),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_db_index(cls, cursor, root_id=None, root_row=None):
|
||||
"""Given id of an entry in store_file, recursively fill this object"""
|
||||
fields = '(id, filename, filetype, unfrozen_perms, frozen, sha256, symlink_target)'
|
||||
if root_row is None:
|
||||
cursor.execute(f'SELECT {fields} FROM store_file WHERE id={root_id};')
|
||||
root_row = cursor.fetchone()
|
||||
root_id, name, filetype, unfrozen_perms, frozen, sha256, symlink_target = root_row
|
||||
# get children
|
||||
cursor.execute(f'SELECT {fields} FROM store_file WHERE parent={root_id} ORDER BY name;')
|
||||
rows = cursor.fetchall()
|
||||
return cls(
|
||||
name=name,
|
||||
relpath=relpath,
|
||||
children=[cls.from_db_index(cursor, root_row=r) for r in rows],
|
||||
filetype=filetype,
|
||||
perms=unfrozen_perms,
|
||||
sha256=bytes.fromhex(sha256),
|
||||
)
|
||||
|
||||
|
||||
class FSDiff:
|
||||
def __init__(A, B):
|
||||
"""Given two hashed directories, efficiently compute difference.
|
||||
|
||||
This assumes the hashes are consistent, so that directories with
|
||||
matching hashes need not be inspected deeply.
|
||||
|
||||
Returns:
|
||||
deleted (Directory): overlay with deleted entries from self
|
||||
modified (Directory): overlay with modified entries from other
|
||||
new (Directory): overlay with new entries from other
|
||||
"""
|
||||
self.A = A
|
||||
self.B = B
|
||||
50
src/nancy/machine.py
Normal file
50
src/nancy/machine.py
Normal file
@ -0,0 +1,50 @@
|
||||
from collections import namedtuple
|
||||
import json
|
||||
import platform
|
||||
import time
|
||||
|
||||
MachineInfo = namedtuple(
|
||||
'MachineInfo',
|
||||
[
|
||||
'machine_id',
|
||||
'hostname',
|
||||
'processor',
|
||||
'system',
|
||||
'release',
|
||||
'cpu_type',
|
||||
'timezone',
|
||||
'freedesktop_os_release',
|
||||
'win32_ver',
|
||||
'mac_ver',
|
||||
],
|
||||
)
|
||||
def get_machine_info():
|
||||
fdor = ''
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
except AttributeError:
|
||||
# freedesktop_os_release only available for python >= 3.10
|
||||
fdor = ''
|
||||
system = platform.system()
|
||||
|
||||
mid = None
|
||||
if system == 'Linux':
|
||||
try:
|
||||
mid = open('/etc/machine-id', 'r').read()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return MachineInfo(
|
||||
machine_id=mid,
|
||||
hostname=platform.node(),
|
||||
processor=platform.processor(),
|
||||
system=system,
|
||||
release=platform.release(),
|
||||
cpu_type=platform.machine(),
|
||||
timezone=time.tzname[time.daylight],
|
||||
freedesktop_os_release=fdor,
|
||||
win32_ver=json.dumps(platform.win32_ver()),
|
||||
mac_ver=json.dumps(platform.mac_ver()),
|
||||
)
|
||||
|
||||
|
||||
@ -13,11 +13,16 @@ PRAGMA foreign_keys=ON;
|
||||
-- come into play. Note that these are not reliable identifiers for purposes of
|
||||
-- tracking down data, but may be helpful context.
|
||||
CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
|
||||
machine_id TEXT, -- platform-dependent unique hardware id
|
||||
-- Linux: open('/etc/machine-id', 'r').read() (assumes systemd)
|
||||
-- OSX: `ioreg -rd1 -c IOPlatformExpertDevice | grep IOPlatformUUID`
|
||||
-- Windows: `reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography /v MachineGuid`
|
||||
|
||||
hostname TEXT, -- platform.node(): 'lucky'
|
||||
processor TEXT, -- platform.processor():
|
||||
system TEXT, -- platform.system(): 'Linux'
|
||||
release TEXT, -- platform.release(): '5.15.64'
|
||||
machine TEXT, -- platform.machine(): 'x86_64'
|
||||
cpu_type TEXT, -- platform.machine(): 'x86_64'
|
||||
timezone TEXT, -- timezone, for interpreting event times
|
||||
freedesktop_os_release TEXT, -- requires python 3.10
|
||||
-- platform.freedesktop_os_release() as JSON
|
||||
@ -34,9 +39,10 @@ CREATE TABLE machine(id INTEGER PRIMARY KEY NOT NULL,
|
||||
);
|
||||
-- Programs are run by users on machines
|
||||
CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
username TEXT NOT NULL,
|
||||
userid INTEGER,
|
||||
fullname TEXT,
|
||||
username TEXT NOT NULL, -- getpass.getuser()
|
||||
userid INTEGER, -- os.getuid()
|
||||
fullname TEXT, -- on Linux/OSX: pwd.getpwuid(os.getuid()).pw_gecos
|
||||
-- on Windows: see https://stackoverflow.com/questions/21766954/how-to-get-windows-users-full-name-in-python
|
||||
machine INTEGER NOT NULL,
|
||||
|
||||
UNIQUE(userid, machine),
|
||||
@ -44,7 +50,7 @@ CREATE TABLE user(id INTEGER PRIMARY KEY NOT NULL,
|
||||
);
|
||||
|
||||
|
||||
-- Stores, directories, and files
|
||||
-- Stores and files (and directories)
|
||||
-- These are the primary objects tracked by nancy.
|
||||
-- A store is a directory containing a file called nancy.db (e.g. the dir holding this database)
|
||||
CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
|
||||
@ -55,50 +61,54 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
|
||||
|
||||
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- Within the store are directories that contain files.
|
||||
-- Note that there should be one entry with relpath='.' for the non-imported store.
|
||||
CREATE TABLE store_directory (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT,
|
||||
store INTEGER NOT NULL,
|
||||
parent INTEGER, -- parent directory (should be) in same store
|
||||
frozen BOOL NOT NULL,
|
||||
|
||||
UNIQUE(store, name, parent),
|
||||
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE,
|
||||
-- parent is a recursive key within this table.
|
||||
FOREIGN KEY (parent) REFERENCES store_directory (id) ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TRIGGER insert_store_directory BEFORE INSERT ON store_directory
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER update_store_directory BEFORE UPDATE ON store_directory
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_directory WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
|
||||
-- The store_file table holds all files AND DIRECTORIES that are tracked by the
|
||||
-- store. Files and directories are distinguished by the filetype column. This
|
||||
-- table also holds tracked files and directories that have been imported and
|
||||
-- live outside the current store.
|
||||
CREATE TABLE store_file (id INTEGER PRIMARY KEY NOT NULL,
|
||||
directory INTEGER NOT NULL, -- reference a directory inside a store
|
||||
store INTEGER NOT NULL,
|
||||
filename TEXT, -- only a filename, not a path
|
||||
frozen BOOL NOT NULL,
|
||||
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
|
||||
|
||||
frozen BOOL NOT NULL,
|
||||
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
|
||||
|
||||
sha256 TEXT NOT NULL,
|
||||
|
||||
source_program INTEGER, -- Note that this is redundant since datum points to a program...
|
||||
source_datum INTEGER,
|
||||
|
||||
UNIQUE(filename, directory),
|
||||
FOREIGN KEY (directory) REFERENCES store_directory (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE
|
||||
);
|
||||
UNIQUE(store, filename, parent),
|
||||
|
||||
-- A computational environment which can execute "processes". Note that the
|
||||
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (source_datum) REFERENCES datum (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (source_program) REFERENCES program (id) ON UPDATE CASCADE
|
||||
);
|
||||
CREATE TRIGGER insert_store_file BEFORE INSERT ON store_file
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
--WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR'
|
||||
--THEN RAISE (ABORT, 'Parent is not listed as a directory')
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER update_store_file BEFORE UPDATE ON store_file
|
||||
BEGIN
|
||||
SELECT
|
||||
CASE
|
||||
--WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM store_file WHERE id = NEW.parent) != 'DIR'
|
||||
--THEN RAISE (ABORT, 'Parent is not listed as a directory')
|
||||
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM store_file WHERE id = NEW.parent)
|
||||
THEN RAISE (ABORT, 'Parent directory resides in different store')
|
||||
END;
|
||||
END;
|
||||
|
||||
-- A computational environment which can execute "programs". Note that the
|
||||
-- python executable being used, and environment variables are recorded here.
|
||||
-- Other info is available in the parent "machine" table.
|
||||
CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
@ -111,30 +121,36 @@ CREATE TABLE environment (id INTEGER PRIMARY KEY NOT NULL,
|
||||
FOREIGN KEY (user) REFERENCES user (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
|
||||
-- A "program" is an execution of a single graph, starting from a number of roots
|
||||
-- It corresponds to a single host parent process. Note that if multiple calls to
|
||||
-- nancy.run() are provided within a single python script, multiple "programs"
|
||||
-- may be created. Also note that historical programs are automatically
|
||||
-- imported and merged when possible when loading a "datum" from disk.
|
||||
-- Note that if multiple calls to nancy.run() are provided within a single
|
||||
-- python script, multiple "programs" may be created. Also note that historical
|
||||
-- programs are automatically imported and merged when possible when loading a
|
||||
-- "datum" from disk.
|
||||
CREATE TABLE program (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT, -- name of the program, usually written in code e.g. cnn_crossval
|
||||
name TEXT, -- name of the program, usually written lowercase by calling code e.g. cnn_crossval
|
||||
-- Names of built-in operations will be shown in upper case: e.g. 'FREEZE'
|
||||
|
||||
-- we use POSIX timestamps in UTC for time recording.
|
||||
-- e.g. datetime.datetime.now().timestamp()
|
||||
start_time REAL,
|
||||
end_time REAL,
|
||||
|
||||
process_id INTEGER, -- host PID of python process on host OS
|
||||
environment INTEGER NOT NULL,
|
||||
message TEXT, -- user-defined message to help distinguish similar runs
|
||||
FOREIGN KEY (environment) REFERENCES environment (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
|
||||
-- We try to track all python packages that impact execution by traversing a
|
||||
-- copy of sys.modules. This is done once before a "process" and once after in
|
||||
-- copy of sys.modules. This is done once before a "program" and once after in
|
||||
-- case some calling code winds up calling a previously-unloaded module.
|
||||
CREATE TABLE py_package (id INTEGER PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
version TEXT,
|
||||
process INTEGER NOT NULL,
|
||||
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
|
||||
program INTEGER NOT NULL,
|
||||
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A py_module describes any python module file containing decorated Functions.
|
||||
-- Modules are tracked since they impact the global scope of function calls.
|
||||
@ -183,20 +199,20 @@ CREATE TABLE func_output(id INTEGER PRIMARY KEY NOT NULL,
|
||||
|
||||
-- Tasks are executed calls to Functions: they correspond to a particular set of
|
||||
-- inputs which are themselves data (see datum table). A task is executed in
|
||||
-- the context of a "process". Within a process, tasks are typically evaluated
|
||||
-- the context of a "program". Within a program, tasks are typically evaluated
|
||||
-- in a serial manner.
|
||||
CREATE TABLE task(id INTEGER PRIMARY KEY NOT NULL,
|
||||
func INTEGER NOT NULL,
|
||||
process INTEGER NOT NULL,
|
||||
program INTEGER NOT NULL,
|
||||
FOREIGN KEY (func) REFERENCES func (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (process) REFERENCES process (id) ON UPDATE CASCADE
|
||||
FOREIGN KEY (program) REFERENCES program (id) ON UPDATE CASCADE
|
||||
);
|
||||
-- A datum is an object that is computed as the output of a task, given as a
|
||||
-- literal value in a config file, or loaded from a file.
|
||||
CREATE TABLE datum(id INTEGER PRIMARY KEY NOT NULL,
|
||||
-- provider_type describes where the datum came from. Choices are:
|
||||
-- - COMPUTED: output of a decorated Function
|
||||
-- - IMPORTED: imported output from a prior process
|
||||
-- - IMPORTED: imported output from a prior program
|
||||
-- - LITERAL: literal value provided in a config file
|
||||
-- Note that literals are implicitly imported (hash computed) at runtime
|
||||
provider_type TEXT,
|
||||
@ -231,8 +247,9 @@ CREATE TABLE task_input(id INTEGER PRIMARY KEY NOT NULL,
|
||||
func_input INTEGER NOT NULL,
|
||||
|
||||
datum INTEGER NOT NULL,
|
||||
-- Data have versions to facilitate tacking non-const operations. If a datum
|
||||
-- is passed to a non-const operation, a new datum is provided
|
||||
-- Data have versions to facilitate tracking non-const operations. If a datum
|
||||
-- is passed to a non-const operation, it must increment its internal
|
||||
-- version
|
||||
datum_version INTEGER NOT NULL,
|
||||
|
||||
FOREIGN KEY (task) REFERENCES task (id) ON UPDATE CASCADE,
|
||||
|
||||
@ -1,26 +1,14 @@
|
||||
"""Utilities for creating new stores and linking between them."""
|
||||
|
||||
from . import db
|
||||
from . import db, fs, machine
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
import stat
|
||||
|
||||
|
||||
def remove_write_perms(path):
|
||||
"""Remove write permissions for all users while preserving other perms"""
|
||||
s = os.stat(path)
|
||||
orig_perm_string = stat.filemode(s)
|
||||
os.chmod(
|
||||
path,
|
||||
s.st_mode ^ stat.S_IWUSR ^ stat.S_IWGRP ^ stat.S_IWOTH,
|
||||
follow_symlinks=False,
|
||||
)
|
||||
return orig_perm_string
|
||||
|
||||
|
||||
class Store:
|
||||
"""Describes a data directory, holds active connection to nancy.db"""
|
||||
@ -82,63 +70,26 @@ class Store:
|
||||
|
||||
def record_machine_description(self):
|
||||
"""Record machine-specific information"""
|
||||
import platform, time
|
||||
cur = self.conn.cursor()
|
||||
fdor = ''
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
except AttributeError:
|
||||
# freedesktop_os_release only available for python >= 3.10
|
||||
fdor = ''
|
||||
machine_info = (
|
||||
platform.node(),
|
||||
platform.processor(),
|
||||
platform.system(),
|
||||
platform.release(),
|
||||
platform.machine(),
|
||||
time.tzname[time.daylight],
|
||||
fdor,
|
||||
json.dumps(platform.win32_ver()),
|
||||
json.dumps(platform.mac_ver()),
|
||||
)
|
||||
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?)', machine_info)
|
||||
machine_info = machine.get_machine_info()
|
||||
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?,?)', machine_info)
|
||||
self.machine_id = cur.lastrowid
|
||||
self.conn.commit()
|
||||
|
||||
def make_readonly(self):
|
||||
"""Make store directory read-only (except for nancy.db) and return file list"""
|
||||
dirpaths = {}
|
||||
regfilepaths = {}
|
||||
symlinkpaths = {}
|
||||
irregfilepaths = {} # any file that's not regular or symlink. e.g. inodes or sockets
|
||||
fs.make_readonly_recursive(self.path, excluded='./nancy.db')
|
||||
|
||||
# traversing bottom-up makes it easier to freeze perms on directories
|
||||
for root, dirs, files in os.walk(self.path, topdown=False):
|
||||
for f in files:
|
||||
p = os.path.join(root, f)
|
||||
if p == './nancy.db':
|
||||
continue
|
||||
|
||||
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
if not os.path.isfile(p):
|
||||
# not a link or regular file
|
||||
irregfilepaths[p] = orig_perms
|
||||
|
||||
if os.path.islink(p):
|
||||
symlinkpaths[p] = orig_perms
|
||||
def fs_entries(self, shallow=False):
|
||||
"""Return recursive structure containing FSEntry objects from db"""
|
||||
# get the database id for the table entry in this store having name '.'
|
||||
cur = self.conn.cursor()
|
||||
cur.execute('SELECT id FROM store_file WHERE store=0 AND parent is NULL;')
|
||||
root_id = cur.fetchone()
|
||||
if root_id is None:
|
||||
return fs.FSEntry.empty_root()
|
||||
else:
|
||||
regfilepaths[p] = orig_perms
|
||||
|
||||
for d in dirs:
|
||||
p = os.path.join(root, d)
|
||||
|
||||
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
dirpaths[p] = orig_perms
|
||||
return dirpaths, regfilepaths, symlinkpaths, irregfilepaths
|
||||
|
||||
def file_hashes(self, local_only=True):
|
||||
"""Get recorded hashes of all files currently tracked in this store"""
|
||||
return fs.FSEntry.from_db_index(cur, root_id=root_id)
|
||||
|
||||
|
||||
def freeze(self):
|
||||
@ -152,7 +103,7 @@ class Store:
|
||||
", ".join(irreg),
|
||||
)
|
||||
for p, perms in symlinks.items():
|
||||
target = os.path.readlink(p) # gives possibly relative path to tgt
|
||||
target = os.readlink(p) # gives possibly relative path to target
|
||||
resolved = os.path.realpath(p)
|
||||
if not resolved.startswith(os.path.abspath(self.path)):
|
||||
raise Exception(
|
||||
@ -160,10 +111,24 @@ class Store:
|
||||
f"{target} which is outside store path ({self.path})",
|
||||
)
|
||||
|
||||
# compute checksums on all files
|
||||
# get hashes of current directory (recursive)
|
||||
current = fs.FSEntry.from_path(self.path)
|
||||
|
||||
# extract hashes for all previously-cataloged files/dirs
|
||||
recorded = self.fs_entries(shallow=True)
|
||||
|
||||
d = current.diff(recorded)
|
||||
|
||||
# select all dirs
|
||||
|
||||
# detect new and deleted files, and those that have changed type
|
||||
|
||||
# create entries for all directories if they do not yet exist (top
|
||||
# down)
|
||||
|
||||
# insert files and symlinks into store_file, computing checksums on
|
||||
# each
|
||||
|
||||
# update versions in nancy.db as appropriate
|
||||
|
||||
# remove write permissions on nancy.db
|
||||
|
||||
174
tests/test_db.py
174
tests/test_db.py
@ -8,7 +8,7 @@ import sqlite3
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db():
|
||||
def temp_db():
|
||||
"""Create an in-memory database that follow's the nancy schema"""
|
||||
conn = sqlite3.connect(':memory:')
|
||||
cur = conn.cursor()
|
||||
@ -23,13 +23,15 @@ def db():
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_machine(db):
|
||||
db.executemany(
|
||||
@pytest.fixture
|
||||
def insert_machine(temp_db):
|
||||
cur = temp_db
|
||||
cur.executemany(
|
||||
'INSERT INTO machine VALUES '
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'a5d97c08a15c4db69f5fded523a1bfe3', #machine_id TEXT, -- platform-dependent unique hardware id
|
||||
'lucky', #hostname TEXT, -- platform.node(): 'lucky'
|
||||
'', #processor TEXT, -- platform.processor():
|
||||
'Linux', #system TEXT, -- platform.system(): 'Linux'
|
||||
@ -40,7 +42,8 @@ def insert_machine(db):
|
||||
'', #win32_ver TEXT, -- platform.win32_ver() as JSON
|
||||
'', #mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
), (
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'afc9b06a23b74341b29d42b8312a4f8a',
|
||||
'a100', #hostname TEXT, -- platform.node(): 'lucky'
|
||||
'', #processor TEXT, -- platform.processor():
|
||||
'Linux', #system TEXT, -- platform.system(): 'Linux'
|
||||
@ -52,48 +55,55 @@ def insert_machine(db):
|
||||
'', #mac_ver TEXT -- platform.mac_ver() as JSON
|
||||
)],
|
||||
)
|
||||
def test_insert_machine(db):
|
||||
db.execute('SELECT * FROM machine')
|
||||
machines = db.fetchall()
|
||||
return cur
|
||||
|
||||
def test_insert_machine(insert_machine):
|
||||
cur = insert_machine
|
||||
cur.execute('SELECT * FROM machine')
|
||||
machines = cur.fetchall()
|
||||
assert len(machines) == 2
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_user(db):
|
||||
db.executemany(
|
||||
@pytest.fixture
|
||||
def insert_user(insert_machine):
|
||||
cur = insert_machine
|
||||
cur.executemany(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
101, #userid INTEGER,
|
||||
'Jacob Hinkle', #fullname TEXT,
|
||||
1, #machine INTEGER NOT NULL,
|
||||
), (
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
10301, #userid INTEGER,
|
||||
'Jacob Hinkle', #fullname TEXT,
|
||||
2, #machine INTEGER NOT NULL,
|
||||
), (
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'bob', #username TEXT NOT NULL,
|
||||
2035, #userid INTEGER,
|
||||
'Just Bob', #fullname TEXT,
|
||||
2, #machine INTEGER NOT NULL,
|
||||
)],
|
||||
)
|
||||
def test_insert_user(db):
|
||||
db.execute('SELECT * FROM user')
|
||||
users = db.fetchall()
|
||||
return cur
|
||||
def test_insert_user(insert_user):
|
||||
cur = insert_user
|
||||
cur.execute('SELECT * FROM user')
|
||||
users = cur.fetchall()
|
||||
assert len(users) == 3
|
||||
def test_invalid_user_machine(db):
|
||||
def test_invalid_user_machine(insert_user):
|
||||
cur = insert_user
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# should fail foreign key constraint
|
||||
db.execute(
|
||||
cur.execute(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'bozo', #username TEXT NOT NULL,
|
||||
100, #userid INTEGER,
|
||||
'Bozo the Clown', #fullname TEXT,
|
||||
@ -102,11 +112,11 @@ def test_invalid_user_machine(db):
|
||||
)
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# should fail uniqueness constraint
|
||||
db.execute(
|
||||
cur.execute(
|
||||
'INSERT INTO user VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
'jacob', #username TEXT NOT NULL,
|
||||
101, #userid INTEGER,
|
||||
'Bozo the Clown', #fullname TEXT,
|
||||
@ -115,81 +125,118 @@ def test_invalid_user_machine(db):
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store(db):
|
||||
db.executemany(
|
||||
@pytest.fixture
|
||||
def insert_store(insert_machine):
|
||||
cur = insert_machine
|
||||
cur.executemany(
|
||||
'INSERT INTO store VALUES '
|
||||
'(?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, #machine INTEGER,
|
||||
'/path/to/first/store', #dbpath TEXT NOT NULL,
|
||||
False, #imported BOOL,
|
||||
), (
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, #machine INTEGER,
|
||||
'/path/to/dependencys/store', #dbpath TEXT NOT NULL,
|
||||
'/path/to/dependency/store', #dbpath TEXT NOT NULL,
|
||||
True, #imported BOOL,
|
||||
), (
|
||||
None,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, #machine INTEGER,
|
||||
# same path but on a separate machine
|
||||
'/path/to/first/store', #dbpath TEXT NOT NULL,
|
||||
True, #imported BOOL,
|
||||
)],
|
||||
)
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store_directory(db):
|
||||
db.executemany(
|
||||
'INSERT INTO store_directory VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
return cur
|
||||
|
||||
@pytest.fixture
|
||||
def insert_directories(insert_store):
|
||||
cur = insert_store
|
||||
cur.executemany(
|
||||
'INSERT INTO store_file VALUES '
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, #store INTEGER NOT NULL,
|
||||
None, #parent INTEGER, -- parent directory (should be) in same store
|
||||
'.', #filename TEXT, -- only a filename, not a path
|
||||
None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
|
||||
False, #frozen BOOL NOT NULL,
|
||||
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
|
||||
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
|
||||
None, #source_datum INTEGER,
|
||||
), (
|
||||
None,
|
||||
'foo', #name TEXT,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
1, #store INTEGER NOT NULL,
|
||||
1, #parent INTEGER, -- parent directory (should be) in same store
|
||||
'foo', #filename TEXT, -- only a filename, not a path
|
||||
1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
|
||||
False, #frozen BOOL NOT NULL,
|
||||
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
|
||||
'5ad4e9e3090de8de781e4a35ce6ba16ad8eaba7a5456cd230a36f77143885396', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
|
||||
None, #source_datum INTEGER,
|
||||
), (
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, #store INTEGER NOT NULL,
|
||||
None, #parent INTEGER, -- parent directory (should be) in same store
|
||||
'.', #filename TEXT, -- only a filename, not a path
|
||||
None, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
|
||||
False, #frozen BOOL NOT NULL,
|
||||
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
|
||||
'15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
|
||||
None, #source_datum INTEGER,
|
||||
)],
|
||||
)
|
||||
return cur
|
||||
|
||||
def test_crossstore_directory_insert(db):
|
||||
def test_crossstore_directory_insert(insert_directories):
|
||||
cur = insert_directories
|
||||
with pytest.raises(sqlite3.IntegrityError):
|
||||
# declaring directory as belonging to store 2, but parent's store is 1
|
||||
db.execute(
|
||||
'INSERT INTO store_directory VALUES '
|
||||
'(?, ?, ?, ?, ?)',
|
||||
(
|
||||
None,
|
||||
'.', #name TEXT,
|
||||
2, #store INTEGER NOT NULL,
|
||||
1, #parent INTEGER, -- parent directory (should be) in same store
|
||||
False, #frozen BOOL NOT NULL,
|
||||
))
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def insert_store_file(db):
|
||||
db.executemany(
|
||||
cur.execute(
|
||||
'INSERT INTO store_file VALUES '
|
||||
'(?, ?, ?, ?, ?, ?)',
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
(
|
||||
None, #id INTEGER PRIMARY KEY NOT NULL,
|
||||
2, #store INTEGER NOT NULL,
|
||||
'xstore_file', #filename TEXT, -- only a filename, not a path
|
||||
1, #parent INTEGER REFERENCES store_file ON UPDATE CASCADE,
|
||||
False, #frozen BOOL NOT NULL,
|
||||
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
|
||||
'dr-xr-xr--', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
|
||||
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target
|
||||
'15c5e8d80a48803c18e72cd274532d608b8026dcbc192afc490fe1c289ec6ff1', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER, -- Note that this is redundant since datum points to a program...
|
||||
None, #source_datum INTEGER,
|
||||
))
|
||||
for row in cur.connection.iterdump():
|
||||
print(row)
|
||||
cur.execute('SELECT * FROM store_file')
|
||||
print(cur.fetchall())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_files(insert_directories):
|
||||
cur = insert_directories
|
||||
cur.executemany(
|
||||
'INSERT INTO store_file VALUES '
|
||||
'(?, ?, ?, ?, ?, ?, ?, ?)',
|
||||
[(
|
||||
None,
|
||||
1, #directory INTEGER NOT NULL
|
||||
'example.csv', #filename TEXT
|
||||
1, #frozen BOOL NOT NULL
|
||||
'-rw-rw-r--', #unfrozen_perms TEXT
|
||||
'9aafde8f9dbec34c694b86333f746f58958c44247c474904e06d1f07f94292b4', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER,
|
||||
None, #source_datum INTEGER,
|
||||
), (
|
||||
None,
|
||||
@ -197,7 +244,10 @@ def insert_store_file(db):
|
||||
'plots.png', #filename TEXT
|
||||
1, #frozen BOOL NOT NULL
|
||||
'-rw-r--r--', #unfrozen_perms TEXT
|
||||
'9add10cc3a6f0e4618dfed005ddfbeafdf268c58b773ba0021963c856d00235b', #sha256 TEXT NOT NULL,
|
||||
None, #source_program INTEGER,
|
||||
None, #source_datum INTEGER,
|
||||
)]
|
||||
)
|
||||
return cur
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user