Update schema putting more into filedir_version

This commit is contained in:
Jacob Hinkle 2022-09-27 11:26:05 -04:00
parent cb53db3bcd
commit 45c2046fd6
8 changed files with 419 additions and 203 deletions

View File

@ -1,7 +1,8 @@
import click import click
from loguru import logger from loguru import logger
from .. import db, fs, store from .. import db, fs
from ..store import find_store, Store
import os import os
import sys import sys
@ -32,7 +33,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True,
hashcolor = Fore.MAGENTA if use_color else '' hashcolor = Fore.MAGENTA if use_color else ''
def _print_row(tag, entry, level): def _print_row(tag, entry, level):
relpath = entry.relpath[2:] relpath = entry.relpath
# Format relpath using filetype-based colors # Format relpath using filetype-based colors
dname, fname = os.path.split(relpath) dname, fname = os.path.split(relpath)
@ -68,19 +69,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True,
_print_row('MOD', d.B, l) _print_row('MOD', d.B, l)
def diff(store, filedir_path, show_hashes=False, use_color=True):
"""Unwrapped diff command that prints a diff"""
if not os.path.exists(filedir_path):
raise FileNotFoundError(f"Cannot diff non-existent file or directory {filedir_path}")
# get the diff object
fsdiff = store.diff(filedir_path)
print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color)
@click.command() @click.command()
@click.argument("path", default='.')
@click.option( @click.option(
'-H', "--show-hashes", '-H', "--show-hashes",
is_flag=True, is_flag=True,
@ -91,8 +80,16 @@ def diff(store, filedir_path, show_hashes=False, use_color=True):
is_flag=True, is_flag=True,
help='If given, do not print any color output.', help='If given, do not print any color output.',
) )
def diff_cli(path, show_hashes, no_color): @click.option(
"-s", "--store",
type=str,
default=None,
help='Top-level of store. If omitted, use closest common parent directory '
'of given paths. If given the path to a non-store directory, a new '
'store is initialized there.',
)
@logger.catch @logger.catch
def diff_cli(show_hashes, no_color, store):
"""Detect and describe changes to PATH """Detect and describe changes to PATH
PATH is a path to a file or directory inside an existing nancy store PATH is a path to a file or directory inside an existing nancy store
@ -101,22 +98,28 @@ def diff_cli(path, show_hashes, no_color):
However, the printed diff information pertains to the given value of However, the printed diff information pertains to the given value of
PATH. PATH.
""" """
storepath = store.find_store(path) if store is None:
cwd = os.getcwd()
storepath = find_store(cwd)
if storepath is None: if storepath is None:
print("Could not find nancy.db in any directory containing " logger.error(
+ str(os.path.realpath(path)), file=sys.stderr) "Could not find nancy.db in any directory containing {}",
sys.exit(1) os.path.realpath(cwd),
)
sys.exit(1)
print("Paths relative to store path:", storepath) print("Paths relative to store path:", store)
# connect to store # connect to store
s = store.Store(storepath) s = Store(store)
try: d = s.diff()
diff(store=s, filedir_path=path, show_hashes=show_hashes, use_color=not no_color)
except FileNotFoundError as e:
print(str(e), file=sys.stderr)
sys.exit(1)
logger.success("Computed diff") logger.success("Computed diff")
print_diff(
d,
show_hashes=show_hashes,
use_color=not no_color,
)

View File

@ -1,7 +1,7 @@
import click import click
from loguru import logger from loguru import logger
from .. import store from .. import fs, store
from .common import confirm from .common import confirm
from .diff import print_diff from .diff import print_diff
@ -9,21 +9,27 @@ from .diff import print_diff
import os import os
import sys import sys
def record(directory, message, show_diff=True, show_hashes=False, use_color=True,
@logger.catch @logger.catch
def record(message, store_path=None, show_diff=True, show_hashes=False, use_color=True,
skip_confirm=False): skip_confirm=False):
"""Unwrapped record command""" """Unwrapped record command"""
if not os.path.isdir(directory):
raise ValueError(f"Cannot record non-existent directory {directory}")
existing_store = store.find_store(directory) if store_path is None:
if existing_store is None: # this is a new store curdir = os.path.realpath(os.getcwd())
s = store.Store.init(directory) logger.info("Looking for store in", curdir)
store_path = store.find_store(curdir)
if store_path is None: # If no store found, assume we're creating here
store_path = curdir
if not os.path.exists(os.path.join(store_path, 'nancy.db')):
# this is a new store
logger.info(f"Initializing new store in {store_path}...")
s = store.Store.init(store_path)
else: # this is an existing store else: # this is an existing store
s = store.Store(directory) s = store.Store(store_path)
fsdiff = s.diff(directory) fsdiff = s.diff()
if show_diff: if show_diff:
print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color) print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color)
@ -38,7 +44,6 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True
@click.command() @click.command()
@click.argument("directory", default='.')
@click.option( @click.option(
'-H', "--show-hashes", '-H', "--show-hashes",
is_flag=True, is_flag=True,
@ -54,15 +59,19 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True
type=str, type=str,
required=True, required=True,
help='A user-defined descriptive message for this recording operation.', help='A user-defined descriptive message for this recording operation.',
def record_cli(directory, show_hashes, no_color, message): )
@click.option(
"-s", "--store",
type=str,
default=None,
help='Top-level of store. If omitted, use closest common parent directory '
'of given paths. If given the path to a non-store directory, a new '
'store is initialized there.',
)
def record_cli(show_hashes, no_color, message, store):
""" """
Initialize tracking or record changes to a tracked directory. Initialize tracking or record changes to a tracked directory.
If DIRECTORY is not already part of an existing nancy store, then a new
'nancy.db' file is created in that directory. On the other hand, if the
directory is part of an existing store, it will be updated and versions
of any files changes since the last recording will be incremented.
""" """
record(directory, message=message, show_hashes=show_hashes, use_color=not record(message=message, show_hashes=show_hashes, use_color=not
no_color) no_color, store_path=store)

View File

@ -1,10 +1,13 @@
"""Interaction with the filesystem and with file database entries""" """Interaction with the filesystem and with file database entries"""
from dataclasses import dataclass
from loguru import logger from loguru import logger
from dataclasses import asdict, dataclass
from datetime import datetime
import hashlib import hashlib
import operator import operator
import os import os
from pathlib import Path
import stat import stat
from typing import List from typing import List
import warnings import warnings
@ -64,15 +67,32 @@ def make_readonly_recursive(path, excluded=[]):
@dataclass @dataclass
class FSEntryVersion: class FSEntryVersion:
"""A hashed file or directory.""" """A version of a file or directory."""
id: int
filedir: 'FSEntry'
recorded_time: datetime #When was this version recorded?
filetype: str #One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
deleted: bool #set True when recording a deleted file
unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target:str # if this is a symlink, this is the (read but not fully symlink_target:str # if this is a symlink, this is the (read but not fully
# resolved) target. I.e. this is the "content" of the symlink. # resolved) target. I.e. this is the "content" of the symlink.
sha256: str sha256: str
id: int = None # should not be null
version_counter: int = None # incremented whenever a new version of the file is recorded
source_task_id: int = None source_task_id: int = None
@classmethod
def from_row(cls, row, filedir=None):
if filedir is None:
filedir = row[1]
return cls(
row[0],
filedir,
datetime.fromtimestamp(row[2]),
*row[3:-2],
bytes.fromhex(row[-2]),
row[-1]
)
@dataclass @dataclass
class FSEntry: class FSEntry:
@ -80,11 +100,12 @@ class FSEntry:
id: int # defaults to None id: int # defaults to None
filename: str # with parent directory stripped. None if this is the root filename: str # with parent directory stripped. None if this is the root
relpath: str # relative to some root directory relpath: str # relative to some root directory
parent: 'FSEntry' # upward link
# children for dirs only: non-recursive; files/dirs at this level only # children for dirs only: non-recursive; files/dirs at this level only
children: List['FSEntry'] children: List['FSEntry']
filetype: str # regular, symlink, special (block, char, pipe, or socket) filetype: str # regular, symlink, special (block, char, pipe, or socket)
deleted: bool deleted: bool
versions: [FSEntryVersion] = [] versions: List[FSEntryVersion] = None
# these will be filled from the version list automatically # these will be filled from the version list automatically
unfrozen_perms: str = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' unfrozen_perms: str = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
@ -94,22 +115,32 @@ class FSEntry:
latest_version: FSEntryVersion = None latest_version: FSEntryVersion = None
def __post_init__(self): def __post_init__(self):
if len(self.versions) > 0: if self.versions is not None and len(self.versions) > 0:
self.latest_version = self.versions[-1] self.latest_version = self.versions[-1]
self.unfrozen_perms = self.latest_version.unfrozen_perms self.unfrozen_perms = self.latest_version.unfrozen_perms
self.symlink_target = self.latest_version.symlink_target self.symlink_target = self.latest_version.symlink_target
self.sha256 = self.latest_version.sha256 self.sha256 = self.latest_version.sha256
@classmethod @classmethod
def from_path(cls, root, relpath='.', filestat=None, exclude=['./nancy.db']): def from_path(cls, root, relpath=None, exclude=['nancy.db'], parent=None,
direntry=None):
""" """
Scan a path to instantiate (recursive). Scan a path to instantiate (recursive).
Arguments:
root (str or PathLike): The root directory of an existing or new store path
relpath (str or PathLike): Path of some directory under the store
path in which to find files and directories. Only these entries
and their children will be included.
""" """
m = hashlib.sha256() m = hashlib.sha256()
path = os.path.join(root, relpath) if relpath is None: # top-level invocation at root
path = root
else:
path = os.path.join(root, relpath)
filestat = os.stat(path) if filestat is None else filestat filestat = os.lstat(path) if direntry is None else direntry.stat(follow_symlinks=False)
s = filestat.st_mode s = filestat.st_mode
children = [] children = []
@ -124,7 +155,8 @@ class FSEntry:
# this prevents a directory's hash from colliding with a file hash # this prevents a directory's hash from colliding with a file hash
# in cases where it only holds a single file # in cases where it only holds a single file
m.update(bytes(relpath, 'utf-8')) if relpath is not None:
m.update(bytes(relpath, 'utf-8'))
# we use os.scandir which returns a DirEntry for each child # we use os.scandir which returns a DirEntry for each child
# excluding "." and "..". These variables hold a .stat which we can # excluding "." and "..". These variables hold a .stat which we can
@ -132,14 +164,17 @@ class FSEntry:
direntries = list(os.scandir(path)) direntries = list(os.scandir(path))
direntries.sort(key=operator.attrgetter('name')) direntries.sort(key=operator.attrgetter('name'))
childrenrelpaths = ((e.name if relpath is None else
os.path.join(relpath, e.name)) for e in direntries)
children = [ children = [
cls.from_path( cls.from_path(
root=root, root=root,
relpath=os.path.join(relpath, e.name), relpath=rp,
filestat=e.stat(), direntry=e,
) )
for e in direntries \ for rp, e in zip(childrenrelpaths, direntries)
if os.path.join(relpath, e.name) not in exclude if rp not in exclude
] ]
for c in children: # now hash concatenated sorted hashes for c in children: # now hash concatenated sorted hashes
# hash on perms+sha256 to enable recursively detecting perm # hash on perms+sha256 to enable recursively detecting perm
@ -170,21 +205,41 @@ class FSEntry:
sha256 = m.digest() sha256 = m.digest()
return cls( ob = cls(
id=None, id=None,
filename=os.path.basename(relpath), filename='.' if relpath is None else os.path.basename(relpath),
relpath=relpath, relpath='.' if relpath is None else relpath,
parent=parent,
children=children, children=children,
filetype=filetype, filetype=None,
deleted=False, deleted=None,
versions=[ versions=[
FSEntryVersion( FSEntryVersion(
unfrozen_perms=stat.filemode(filestat.st_mode), id=None,
filedir=None,
recorded_time=datetime.now().timestamp(),
filetype=filetype,
deleted=False,
unfrozen_perms=stat.filemode(s),
symlink_target=symlink_target, symlink_target=symlink_target,
sha256=sha256, sha256=sha256,
source_task_id=None,
) )
], ],
) )
# now change children's parents to point to this object
for v in ob.versions:
v.filedir = ob
if len(ob.versions) > 0:
last_ver = ob.versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
for c in ob.children:
c.parent = ob
return ob
@classmethod @classmethod
def empty_root(cls): def empty_root(cls):
@ -193,49 +248,71 @@ class FSEntry:
id=None, id=None,
filename='.', filename='.',
relpath='.', relpath='.',
parent=None,
children=[], children=[],
filetype='DIR', filetype='DIR',
perms='----------', unfrozen_perms='----------',
sha256=hashlib.sha256().digest(), sha256=hashlib.sha256().digest(),
deleted=False, deleted=False,
) )
@classmethod @classmethod
def from_db_index(cls, cursor, root_id=None, root_row=None):
@logger.catch @logger.catch
def from_db_index(cls, cursor, root_id=None, root_row=None, parent=None):
"""Given id of an entry in filedir, recursively fill this object""" """Given id of an entry in filedir, recursively fill this object"""
fields = 'id, filename, filetype, frozen, deleted'
if root_row is None: if root_row is None:
assert root_id is not None assert root_id is not None
cursor.execute(f'SELECT {fields} FROM filedir WHERE id=?', root_id) logger.debug('root_id({})={}', type(root_id), root_id)
cursor.execute(
'SELECT id, name, frozen FROM filedir WHERE id=?',
(root_id,),
)
root_row = cursor.fetchone() root_row = cursor.fetchone()
root_id, filename, filetype, frozen, deleted = root_row root_id, filename, frozen = root_row
cursor.execute(f'SELECT {fields} FROM filedir WHERE parent=? ORDER BY name', root_id) relpath = filename if parent is None else os.path.join(parent.relpath, filename)
rows = cursor.fetchall()
children = [cls.from_db_index(cursor, root_row=r) for r in rows]
fields = 'id, version_counter, unfrozen_perms, symlink_target, sha256, source_task_id' # instantiate class before filling children
cursor.execute(f'SELECT {fields} FROM filedir_version WHERE filedir=? ORDER BY version', self.id) ob = cls(
matches = cursor.fetchall()
versions = [FSEntryVersion(*row) for row in matches]
return cls(
id=root_id, id=root_id,
filename=filename, filename=filename,
relpath=relpath, relpath=relpath,
children=children, parent=parent,
filetype=filetype, children=[],
perms=unfrozen_perms, filetype=None,
sha256=bytes.fromhex(sha256), unfrozen_perms=None,
deleted=deleted, sha256=None,
versions=versions, deleted=None,
versions=[],
) )
def versions(self, cursor): cursor.execute(f'''
"""Get a list from the database of FSEntryVersion objects.""" SELECT id, name, frozen
if self.id is None: FROM filedir
return None WHERE parent=?
''', (root_id,))
rows = cursor.fetchall()
ob.children = [cls.from_db_index(cursor, root_row=r, parent=ob) for r in rows]
# get all versions
fields = ('')
cursor.execute(f'''
SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time
''', (root_id,))
matches = cursor.fetchall()
versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
if len(versions) > 0:
last_ver = versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
ob.last_version = last_ver
return ob
def flatten_tree(self, level=0): def flatten_tree(self, level=0):
"""Return list of all entries, with level, in pairs""" """Return list of all entries, with level, in pairs"""
@ -244,6 +321,32 @@ class FSEntry:
pairs.extend(c.flatten_tree(level=level + 1)) pairs.extend(c.flatten_tree(level=level + 1))
return pairs return pairs
def __str__(self):
return self.to_string(level=0)
def to_string(self, level=0):
if len(self.children) == 0:
childsec = "[]"
else:
childstrs = [c.to_string(level=level + 1) for c in self.children]
childsep = '\n\n'
childsec = childsep + childsep.join(c for c in childstrs)
# TODO: list versions in str()
#versions: [FSEntryVersion] = []
return '\n'.join((' ' * level) + l for l in f"""id: {self.id}
filename: {self.filename}
relpath: {self.relpath}
parent (relpath): {'None' if self.parent is None else self.parent.relpath}
filetype: {self.filetype}
deleted: {self.deleted}
unfrozen_perms: {self.unfrozen_perms}
symlink_target: {self.symlink_target}
sha256: {self.sha256.hex()}
children: {childsec}
""".splitlines())
def sort_diffs_filename(diffs): def sort_diffs_filename(diffs):
name_ent = {e.filename(): e for e in diffs} name_ent = {e.filename(): e for e in diffs}
@ -259,7 +362,7 @@ class FSDiff:
@staticmethod @staticmethod
def compare(A, B): def compare(A, B):
return A.sha256 == B.sha256 and \ return A.sha256 == B.sha256 and \
A.perms == B.perms and \ A.unfrozen_perms == B.unfrozen_perms and \
A.filetype == B.filetype and \ A.filetype == B.filetype and \
A.deleted == B.deleted A.deleted == B.deleted
@ -302,7 +405,7 @@ class FSDiff:
Alist = {c.filename: c for c in A.children} Alist = {c.filename: c for c in A.children}
Blist = {c.filename: c for c in B.children} Blist = {c.filename: c for c in B.children}
allnames = set(Alist.keys() + Blist.keys()) allnames = set(list(Alist.keys()) + list(Blist.keys()))
modified_children = [cls.compute( modified_children = [cls.compute(
Alist.get(n, None), Alist.get(n, None),

View File

@ -98,67 +98,55 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
); );
-- The filedir table holds all files and directories that are tracked by the -- The filedir table holds all files and directories that are tracked by the
-- store. Files and directories are distinguished by the filetype column. This -- store. This table also holds tracked files and directories that have been
-- table also holds tracked files and directories that have been imported and -- imported and live outside the current store.
-- live outside the current store. -- We do not support renaming files. Once an entry is created here, it should
-- only be updated to reflect frozen/thawed status.
CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL, CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL,
store INTEGER NOT NULL, store INTEGER NOT NULL,
filename TEXT, -- only a filename, not a path name TEXT, -- only a filename, not a path
parent INTEGER REFERENCES filedir ON UPDATE CASCADE, parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
-- Note that changes in filetype are seen as a delete/replace instead of an
-- update. If such a change is noticed, the original filedir entry must be
-- marked deleted and a new one created with the new filetype.
frozen BOOL NOT NULL, frozen BOOL NOT NULL,
deleted BOOL NOT NULL, -- set True when recording a deleted file
-- NOTE: a deleted file should not be updated, other than through a cascade
UNIQUE(store, filename, parent), UNIQUE(store, name, parent)
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE
); );
-- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR'
THEN RAISE (ABORT, 'Parent is not listed as a directory')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent) WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store') THEN RAISE (ABORT, 'Parent resides in different store')
END; END; END; END;
CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir
BEGIN SELECT CASE BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR' WHEN (NEW.id != OLD.id OR NEW.store != OLD.store OR NEW.parent != OLD.parent)
THEN RAISE (ABORT, 'Parent is not listed as a directory') THEN RAISE (ABORT, 'The only updates to filedir allowed are to the frozen column')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END; END;
-- Files that are recorded to be deleted have the deleted column set to True.
-- These files should no longer be used. In particular, we should never rename
-- or update them since their lifetime is over.
INSERT INTO triggers VALUES('update_deleted_filedir', TRUE);
CREATE TRIGGER update_deleted_filedir BEFORE UPDATE ON filedir
BEGIN SELECT CASE
WHEN OLD.deleted AND (SELECT enabled FROM triggers WHERE name = 'update_deleted_filedir')
THEN RAISE (ABORT, 'Cannot modify filedir entry for deleted file')
END; END; END; END;
-- This table holds _versions_ of files. This table holds information that is -- This table holds _versions_ of files and directories. This table holds
-- independent of the location within the filesystem and merely indicates a -- information that is independent of the location within the filesystem and
-- version of the content in a format that is nearly independent of filetype -- merely indicates a version of the content in a format that is nearly
-- (though the computation of content hashes is of course dependent on -- independent of filetype (though the computation of content hashes is of
-- filetype). Each version has a number, and was provided by some program (and -- course dependent on filetype). Each version has a number, and was provided by
-- potentially a datum). -- some program (and potentially a datum).
CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL, CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
filedir INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry filedir INTEGER NOT NULL
version_counter INTEGER NOT NULL, -- incremented whenever a new version of the file is recorded REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
recorded_time REAL, -- When was this version recorded?
-- Note that changing filetype (e.g. directory becomes file) or deleting a
-- file are simply just new versions of a filedir.
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
deleted BOOL NOT NULL, -- set True when recording a deleted file
-- We record the permissions on each file, in a way that enables reloading -- We record the permissions on each file, in a way that enables reloading
-- permissions properly when thawing after a freeze operation. -- permissions properly when thawing after a freeze operation.
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink.
-- The following hash can be NULL if the file was deleted. It could also be -- The following hash can be NULL if the file was deleted. It could also be
-- null if hashing is deferred. Hash deferring would be preferred in cases -- null if hashing is deferred. Hash deferring would be preferred in cases
@ -170,26 +158,19 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
-- can defer by only hashing files and directories at the end of a program. -- can defer by only hashing files and directories at the end of a program.
sha256 TEXT, sha256 TEXT,
source_task INTEGER, source_task INTEGER REFERENCES task (id) ON UPDATE CASCADE
UNIQUE(filedir, version),
FOREIGN KEY (source_task) REFERENCES task (id) ON UPDATE CASCADE
); );
-- Disallow inserting or deleting versions for deleted files. (the version -- Disallow UPDATING filedir_version. Instead, new version should be created.
-- deleting the file should be added first, then the deleted field set on the -- One exception is during importing, in which case we can disable the trigger
-- filedir row). The update trigger is toggleable to enable importing. INSERT INTO triggers VALUES('update_filedir_version', TRUE);
CREATE TRIGGER insert_filedir_version_parent_deleted BEFORE INSERT ON filedir_version CREATE TRIGGER update_filedir_version BEFORE UPDATE ON filedir_version
BEGIN SELECT CASE BEGIN SELECT CASE
WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent) WHEN (SELECT enabled FROM triggers WHERE name = 'update_filedir_version')
THEN RAISE (ABORT, 'Attempted to insert filedir_version whose filedir was deleted') THEN RAISE (ABORT,
END; END; 'Updating filedir versions is prohibited, other than for id changes')
INSERT INTO triggers VALUES('update_filedir_version_parent_deleted', TRUE);
CREATE TRIGGER update_filedir_version_parent_deleted BEFORE UPDATE ON filedir_version
BEGIN SELECT CASE
WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent)
AND (SELECT enabled FROM triggers WHERE name = 'update_filedir_version_parent_deleted')
THEN RAISE (ABORT, 'Attempted to update filedir_version whose parent was deleted')
END; END; END; END;
-- TODO: check for inserting inconsistent version info re. deleted flag/filetype
-- A computational environment which can execute "programs". Note that the -- A computational environment which can execute "programs". Note that the

View File

@ -46,12 +46,20 @@ class Program:
self.message, #message TEXT, -- user-defined message to help distinguish similar runs self.message, #message TEXT, -- user-defined message to help distinguish similar runs
)) ))
self.id = cur.lastrowid self.id = cur.lastrowid
cur.connection.commit()
self.set_start_time(datetime.datetime.now()) self.set_start_time(datetime.datetime.now())
return self return self
def new_task(self, name, py_function_id=None):
"""Create a new task and return its id"""
cur = self.store.conn.cursor()
cur.execute(
"INSERT INTO task VALUES (?, ?, ?)",
(None, self.id, py_function_id),
)
return cur.lastrowid
def __exit__(self, exc_type, exc_value, exc_traceback): def __exit__(self, exc_type, exc_value, exc_traceback):
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
# record start and end times in store # record start and end times in store
@ -142,7 +150,7 @@ class Store:
"""Get the database id for the table entry in this store having name '.'""" """Get the database id for the table entry in this store having name '.'"""
if cur is None: if cur is None:
cur = self.conn.cursor() cur = self.conn.cursor()
cur.execute('SELECT id FROM filedir WHERE store=0 AND parent is NULL;') cur.execute('SELECT id FROM filedir WHERE store=1 AND parent is NULL')
return cur.fetchone() return cur.fetchone()
def path_to_fsentry(self, path): def path_to_fsentry(self, path):
@ -177,52 +185,111 @@ class Store:
return fd_id return fd_id
return fs.FSEntry.from_db_index(cur, root_id=fd_id) return fs.FSEntry.from_db_index(cur, root_id=fd_id)
def recorded_status(self, filepath):
recorded = self.path_to_fsentry(filepath)
def fs_entries(self, shallow=False): def fs_entries(self, shallow=False):
"""Return recursive structure containing FSEntry objects from db""" """Return recursive structure containing FSEntry objects from db"""
root_id = self.filedir_root_index() root_id = self.filedir_root_index()
logger.debug('root_id={}', root_id) logger.debug('root_id={}', root_id)
if root_id is None: if root_id is None:
logger.trace("Empty root") logger.trace("Empty root")
return fs.FSEntry.empty_root() return fs.FSEntry.empty_root()
else: else:
return fs.FSEntry.from_db_index(cur, root_id=root_id) logger.trace("Non-empty root", root_id)
return fs.FSEntry.from_db_index(self.conn.cursor(), root_id=root_id)
def program(self, name, message=None): def program(self, name, message=None):
return Program(self, name, message) return Program(self, name, message)
def diff(self, filepath): def diff(self):
""" """
Diff a file or directory compared to its recorded version Find changes to files and directories compared to their recorded versions
""" """
# get info about current files at this location
current = fs.FSEntry.from_path(
self.path,
# send a relative path from self.path to filepath, which is user
# provided
os.path.relpath(os.path.realpath(filepath), os.path.realpath(self.path)),
)
logger.trace("DIFF") logger.trace("DIFF")
# get info about files currently at the given locations
current = fs.FSEntry.from_path(self.path)
recorded = self.path_to_fsentry(filepath)
logger.debug("CURRENT: \n{}", str(current)) logger.debug("CURRENT: \n{}", str(current))
# then find a listing covering all the expected paths
#recorded = self.recorded_status(self.path)
recorded = self.fs_entries(shallow=True)
logger.debug("RECORDED: \n{}", str(recorded)) logger.debug("RECORDED: \n{}", str(recorded))
return fs.FSDiff.compute(recorded, current) return fs.FSDiff.compute(recorded, current)
def record(self, diff, message=None):
def _record_file_version(self, cur, ob, filedir_id, source_task=None):
cur.execute(
'INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
(
None,
filedir_id,
datetime.datetime.now().timestamp(),
ob.filetype,
False,
ob.unfrozen_perms,
ob.symlink_target,
ob.sha256.hex(),
source_task,
)
)
def _record_new_file_recursive(self, ob, cur, parent_id, source_task):
# Find entries with this name and parent
cur.execute(
"SELECT id FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1",
(ob.filename, None if ob.parent is None else ob.parent.id),
)
res = cur.fetchall()
if len(res) == 0:
# create filedir entry and get its id
cur.execute(
'INSERT INTO filedir VALUES (?, ?, ?, ?, ?)',
(
None,
1,
ob.filename,
parent_id,
False,
))
thisid = cur.lastrowid
else:
thisid, = res[0]
self._record_file_version(cur, ob, thisid, source_task=source_task)
# descend into children and record all of them anew as well
for c in ob.children:
self._record_new_file_recursive(c, cur, thisid, source_task)
def _record_recursive(self, diff, cur, parent_id=None, source_task=None):
"""Record this level of a diff."""
if diff.A is None:
self._record_new_file_recursive(diff.B, cur, parent_id, source_task=None)
elif diff.B is None:
self._record_deleted_file_recursive(diff.B, cur, parent_id)
else:
# modified
pass
# descend into children
def record(self, diff, parent_id=None, message=None, cur=None):
if cur is None:
cur = self.conn.cursor()
with self.program('RECORD', message) as p: with self.program('RECORD', message) as p:
for _, d in diff.flatten_tree(): # create a task for this operation
pass task_id = p.new_task('Store._record_recursive')
# create entries for all directories if they do not yet exist (top
# down)
# insert files and symlinks into filedir, computing checksums on
# each
# update versions in nancy.db as appropriate
# descend the diff, tracking parent filedir IDs, creating them and
# recording new versions of each, when necessary
self._record_recursive(diff, cur, source_task=task_id)
#@contextmanager #@contextmanager
def run( def run(

View File

@ -3,6 +3,7 @@ Pure SQL tests that don't depend on nancy's Python code
""" """
import pytest import pytest
import datetime
import os import os
import sqlite3 import sqlite3
@ -156,31 +157,60 @@ def insert_directories(insert_store):
cur = insert_store cur = insert_store
cur.executemany( cur.executemany(
'INSERT INTO filedir VALUES ' 'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)', '(?, ?, ?, ?, ?)',
[( [(
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL, 1, #store INTEGER NOT NULL,
'.', #filename TEXT, -- only a filename, not a path '.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), ( ), (
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL, 1, #store INTEGER NOT NULL,
'foo', #filename TEXT, -- only a filename, not a path 'foo', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), ( ), (
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL, 2, #store INTEGER NOT NULL,
'.', #filename TEXT, -- only a filename, not a path '.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL, )],
)
cur.executemany(
'INSERT INTO filedir_version VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
)], )],
) )
return cur return cur
@ -191,15 +221,13 @@ def test_crossstore_directory_insert(insert_directories):
# declaring directory as belonging to store 2, but parent's store is 1 # declaring directory as belonging to store 2, but parent's store is 1
cur.execute( cur.execute(
'INSERT INTO filedir VALUES ' 'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)', '(?, ?, ?, ?, ?)',
( (
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL, 2, #store INTEGER NOT NULL,
'some_dir', #filename TEXT, -- only a filename, not a path 'some_dir', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
)) ))
for row in cur.connection.iterdump(): for row in cur.connection.iterdump():
print(row) print(row)
@ -210,55 +238,79 @@ def test_crossstore_directory_insert(insert_directories):
@pytest.fixture @pytest.fixture
def insert_files(insert_directories): def insert_files(insert_directories):
cur = insert_directories cur = insert_directories
cur.execute('SELECT COUNT(*) FROM filedir')
nprev, = cur.fetchone()
cur.executemany( cur.executemany(
'INSERT INTO filedir VALUES ' 'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)', '(?, ?, ?, ?, ?)',
[( [(
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL, 1, #store INTEGER NOT NULL,
'example.csv', #filename TEXT, -- only a filename, not a path 'example.csv', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), ( ), (
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL, 1, #store INTEGER NOT NULL,
'plots.png', #filename TEXT, -- only a filename, not a path 'plots.png', #filename TEXT, -- only a filename, not a path
2, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, 2, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
)] )]
) )
cur.executemany(
'INSERT INTO filedir_version VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), ( # second version of first file
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 2, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
)])
return cur return cur
def test_nondir_parent_directory_insert(insert_files): # TODO: This test is disabled until triggers are added to check for these types
# of constraints. These became much more complicated to check when I added
# filedir_version.
def disabled_test_nondir_parent_directory_insert(insert_files):
cur = insert_files cur = insert_files
with pytest.raises(sqlite3.IntegrityError): with pytest.raises(sqlite3.IntegrityError):
# declaring parent as 5, but 5 is a file (plots.png) # declaring parent as 5, but 5 is a file (plots.png)
cur.execute( cur.execute(
'INSERT INTO filedir VALUES ' 'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)', '(?, ?, ?, ?, ?)',
( (
None, #id INTEGER PRIMARY KEY NOT NULL, None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL, 1, #store INTEGER NOT NULL,
'some_filedir.txt', #filename TEXT, -- only a filename, not a path 'some_filedir.txt', #filename TEXT, -- only a filename, not a path
5, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, 5, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL, False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
)) ))
for row in cur.connection.iterdump(): for row in cur.connection.iterdump():
print(row) print(row)
cur.execute('SELECT * FROM filedir') cur.execute('SELECT * FROM filedir')
print(cur.fetchall()) print(cur.fetchall())
def test_update_deleted_file(insert_files):
cur = insert_files
# first we set a file to deleted
cur.execute('UPDATE filedir SET deleted=True WHERE id=5')
with pytest.raises(sqlite3.IntegrityError):
# Now we try and update it, which should fail due to trigger
cur.execute('UPDATE filedir SET frozen=True WHERE id=5')

1
tests/test_fs.py Normal file
View File

@ -0,0 +1 @@
from nancy import fs

View File

@ -22,7 +22,7 @@ def filled_dir(bare_dir):
def test_record_untracked_dir(filled_dir): def test_record_untracked_dir(filled_dir):
from nancy.cli.record import record from nancy.cli.record import record
record(filled_dir) record(filled_dir, message='test_record_untracked_dir')
@pytest.fixture @pytest.fixture