Update schema putting more into filedir_version

This commit is contained in:
Jacob Hinkle 2022-09-27 11:26:05 -04:00
parent cb53db3bcd
commit 45c2046fd6
8 changed files with 419 additions and 203 deletions

View File

@ -1,7 +1,8 @@
import click
from loguru import logger
from .. import db, fs, store
from .. import db, fs
from ..store import find_store, Store
import os
import sys
@ -32,7 +33,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True,
hashcolor = Fore.MAGENTA if use_color else ''
def _print_row(tag, entry, level):
relpath = entry.relpath[2:]
relpath = entry.relpath
# Format relpath using filetype-based colors
dname, fname = os.path.split(relpath)
@ -68,19 +69,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True,
_print_row('MOD', d.B, l)
def diff(store, filedir_path, show_hashes=False, use_color=True):
"""Unwrapped diff command that prints a diff"""
if not os.path.exists(filedir_path):
raise FileNotFoundError(f"Cannot diff non-existent file or directory {filedir_path}")
# get the diff object
fsdiff = store.diff(filedir_path)
print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color)
@click.command()
@click.argument("path", default='.')
@click.option(
'-H', "--show-hashes",
is_flag=True,
@ -91,8 +80,16 @@ def diff(store, filedir_path, show_hashes=False, use_color=True):
is_flag=True,
help='If given, do not print any color output.',
)
def diff_cli(path, show_hashes, no_color):
@click.option(
"-s", "--store",
type=str,
default=None,
help='Top-level of store. If omitted, use closest common parent directory '
'of given paths. If given the path to a non-store directory, a new '
'store is initialized there.',
)
@logger.catch
def diff_cli(show_hashes, no_color, store):
"""Detect and describe changes to PATH
PATH is a path to a file or directory inside an existing nancy store
@ -101,22 +98,28 @@ def diff_cli(path, show_hashes, no_color):
However, the printed diff information pertains to the given value of
PATH.
"""
storepath = store.find_store(path)
if store is None:
cwd = os.getcwd()
storepath = find_store(cwd)
if storepath is None:
print("Could not find nancy.db in any directory containing "
+ str(os.path.realpath(path)), file=sys.stderr)
sys.exit(1)
if storepath is None:
logger.error(
"Could not find nancy.db in any directory containing {}",
os.path.realpath(cwd),
)
sys.exit(1)
print("Paths relative to store path:", storepath)
print("Paths relative to store path:", store)
# connect to store
s = store.Store(storepath)
s = Store(store)
try:
diff(store=s, filedir_path=path, show_hashes=show_hashes, use_color=not no_color)
except FileNotFoundError as e:
print(str(e), file=sys.stderr)
sys.exit(1)
d = s.diff()
logger.success("Computed diff")
print_diff(
d,
show_hashes=show_hashes,
use_color=not no_color,
)

View File

@ -1,7 +1,7 @@
import click
from loguru import logger
from .. import store
from .. import fs, store
from .common import confirm
from .diff import print_diff
@ -9,21 +9,27 @@ from .diff import print_diff
import os
import sys
def record(directory, message, show_diff=True, show_hashes=False, use_color=True,
@logger.catch
def record(message, store_path=None, show_diff=True, show_hashes=False, use_color=True,
skip_confirm=False):
"""Unwrapped record command"""
if not os.path.isdir(directory):
raise ValueError(f"Cannot record non-existent directory {directory}")
existing_store = store.find_store(directory)
if existing_store is None: # this is a new store
s = store.Store.init(directory)
if store_path is None:
curdir = os.path.realpath(os.getcwd())
logger.info("Looking for store in", curdir)
store_path = store.find_store(curdir)
if store_path is None: # If no store found, assume we're creating here
store_path = curdir
if not os.path.exists(os.path.join(store_path, 'nancy.db')):
# this is a new store
logger.info(f"Initializing new store in {store_path}...")
s = store.Store.init(store_path)
else: # this is an existing store
s = store.Store(directory)
s = store.Store(store_path)
fsdiff = s.diff(directory)
fsdiff = s.diff()
if show_diff:
print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color)
@ -38,7 +44,6 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True
@click.command()
@click.argument("directory", default='.')
@click.option(
'-H', "--show-hashes",
is_flag=True,
@ -54,15 +59,19 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True
type=str,
required=True,
help='A user-defined descriptive message for this recording operation.',
def record_cli(directory, show_hashes, no_color, message):
)
@click.option(
"-s", "--store",
type=str,
default=None,
help='Top-level of store. If omitted, use closest common parent directory '
'of given paths. If given the path to a non-store directory, a new '
'store is initialized there.',
)
def record_cli(show_hashes, no_color, message, store):
"""
Initialize tracking or record changes to a tracked directory.
If DIRECTORY is not already part of an existing nancy store, then a new
'nancy.db' file is created in that directory. On the other hand, if the
directory is part of an existing store, it will be updated and versions
of any files changes since the last recording will be incremented.
"""
record(directory, message=message, show_hashes=show_hashes, use_color=not
no_color)
record(message=message, show_hashes=show_hashes, use_color=not
no_color, store_path=store)

View File

@ -1,10 +1,13 @@
"""Interaction with the filesystem and with file database entries"""
from dataclasses import dataclass
from loguru import logger
from dataclasses import asdict, dataclass
from datetime import datetime
import hashlib
import operator
import os
from pathlib import Path
import stat
from typing import List
import warnings
@ -64,15 +67,32 @@ def make_readonly_recursive(path, excluded=[]):
@dataclass
class FSEntryVersion:
"""A hashed file or directory."""
"""A version of a file or directory."""
id: int
filedir: 'FSEntry'
recorded_time: datetime #When was this version recorded?
filetype: str #One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
deleted: bool #set True when recording a deleted file
unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target:str # if this is a symlink, this is the (read but not fully
# resolved) target. I.e. this is the "content" of the symlink.
sha256: str
id: int = None # should not be null
version_counter: int = None # incremented whenever a new version of the file is recorded
source_task_id: int = None
@classmethod
def from_row(cls, row, filedir=None):
if filedir is None:
filedir = row[1]
return cls(
row[0],
filedir,
datetime.fromtimestamp(row[2]),
*row[3:-2],
bytes.fromhex(row[-2]),
row[-1]
)
@dataclass
class FSEntry:
@ -80,11 +100,12 @@ class FSEntry:
id: int # defaults to None
filename: str # with parent directory stripped. None if this is the root
relpath: str # relative to some root directory
parent: 'FSEntry' # upward link
# children for dirs only: non-recursive; files/dirs at this level only
children: List['FSEntry']
filetype: str # regular, symlink, special (block, char, pipe, or socket)
deleted: bool
versions: [FSEntryVersion] = []
versions: List[FSEntryVersion] = None
# these will be filled from the version list automatically
unfrozen_perms: str = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
@ -94,22 +115,32 @@ class FSEntry:
latest_version: FSEntryVersion = None
def __post_init__(self):
if len(self.versions) > 0:
if self.versions is not None and len(self.versions) > 0:
self.latest_version = self.versions[-1]
self.unfrozen_perms = self.latest_version.unfrozen_perms
self.symlink_target = self.latest_version.symlink_target
self.sha256 = self.latest_version.sha256
@classmethod
def from_path(cls, root, relpath='.', filestat=None, exclude=['./nancy.db']):
def from_path(cls, root, relpath=None, exclude=['nancy.db'], parent=None,
direntry=None):
"""
Scan a path to instantiate (recursive).
Arguments:
root (str or PathLike): The root directory of an existing or new store path
relpath (str or PathLike): Path of some directory under the store
path in which to find files and directories. Only these entries
and their children will be included.
"""
m = hashlib.sha256()
path = os.path.join(root, relpath)
if relpath is None: # top-level invocation at root
path = root
else:
path = os.path.join(root, relpath)
filestat = os.stat(path) if filestat is None else filestat
filestat = os.lstat(path) if direntry is None else direntry.stat(follow_symlinks=False)
s = filestat.st_mode
children = []
@ -124,7 +155,8 @@ class FSEntry:
# this prevents a directory's hash from colliding with a file hash
# in cases where it only holds a single file
m.update(bytes(relpath, 'utf-8'))
if relpath is not None:
m.update(bytes(relpath, 'utf-8'))
# we use os.scandir which returns a DirEntry for each child
# excluding "." and "..". These variables hold a .stat which we can
@ -132,14 +164,17 @@ class FSEntry:
direntries = list(os.scandir(path))
direntries.sort(key=operator.attrgetter('name'))
childrenrelpaths = ((e.name if relpath is None else
os.path.join(relpath, e.name)) for e in direntries)
children = [
cls.from_path(
root=root,
relpath=os.path.join(relpath, e.name),
filestat=e.stat(),
relpath=rp,
direntry=e,
)
for e in direntries \
if os.path.join(relpath, e.name) not in exclude
for rp, e in zip(childrenrelpaths, direntries)
if rp not in exclude
]
for c in children: # now hash concatenated sorted hashes
# hash on perms+sha256 to enable recursively detecting perm
@ -170,21 +205,41 @@ class FSEntry:
sha256 = m.digest()
return cls(
ob = cls(
id=None,
filename=os.path.basename(relpath),
relpath=relpath,
filename='.' if relpath is None else os.path.basename(relpath),
relpath='.' if relpath is None else relpath,
parent=parent,
children=children,
filetype=filetype,
deleted=False,
filetype=None,
deleted=None,
versions=[
FSEntryVersion(
unfrozen_perms=stat.filemode(filestat.st_mode),
id=None,
filedir=None,
recorded_time=datetime.now().timestamp(),
filetype=filetype,
deleted=False,
unfrozen_perms=stat.filemode(s),
symlink_target=symlink_target,
sha256=sha256,
source_task_id=None,
)
],
)
# now change children's parents to point to this object
for v in ob.versions:
v.filedir = ob
if len(ob.versions) > 0:
last_ver = ob.versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
for c in ob.children:
c.parent = ob
return ob
@classmethod
def empty_root(cls):
@ -193,49 +248,71 @@ class FSEntry:
id=None,
filename='.',
relpath='.',
parent=None,
children=[],
filetype='DIR',
perms='----------',
unfrozen_perms='----------',
sha256=hashlib.sha256().digest(),
deleted=False,
)
@classmethod
def from_db_index(cls, cursor, root_id=None, root_row=None):
@logger.catch
def from_db_index(cls, cursor, root_id=None, root_row=None, parent=None):
"""Given id of an entry in filedir, recursively fill this object"""
fields = 'id, filename, filetype, frozen, deleted'
if root_row is None:
assert root_id is not None
cursor.execute(f'SELECT {fields} FROM filedir WHERE id=?', root_id)
logger.debug('root_id({})={}', type(root_id), root_id)
cursor.execute(
'SELECT id, name, frozen FROM filedir WHERE id=?',
(root_id,),
)
root_row = cursor.fetchone()
root_id, filename, filetype, frozen, deleted = root_row
root_id, filename, frozen = root_row
cursor.execute(f'SELECT {fields} FROM filedir WHERE parent=? ORDER BY name', root_id)
rows = cursor.fetchall()
children = [cls.from_db_index(cursor, root_row=r) for r in rows]
relpath = filename if parent is None else os.path.join(parent.relpath, filename)
fields = 'id, version_counter, unfrozen_perms, symlink_target, sha256, source_task_id'
cursor.execute(f'SELECT {fields} FROM filedir_version WHERE filedir=? ORDER BY version', self.id)
matches = cursor.fetchall()
versions = [FSEntryVersion(*row) for row in matches]
return cls(
# instantiate class before filling children
ob = cls(
id=root_id,
filename=filename,
relpath=relpath,
children=children,
filetype=filetype,
perms=unfrozen_perms,
sha256=bytes.fromhex(sha256),
deleted=deleted,
versions=versions,
parent=parent,
children=[],
filetype=None,
unfrozen_perms=None,
sha256=None,
deleted=None,
versions=[],
)
def versions(self, cursor):
"""Get a list from the database of FSEntryVersion objects."""
if self.id is None:
return None
cursor.execute(f'''
SELECT id, name, frozen
FROM filedir
WHERE parent=?
''', (root_id,))
rows = cursor.fetchall()
ob.children = [cls.from_db_index(cursor, root_row=r, parent=ob) for r in rows]
# get all versions
fields = ('')
cursor.execute(f'''
SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time
''', (root_id,))
matches = cursor.fetchall()
versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches]
if len(versions) > 0:
last_ver = versions[-1]
ob.filetype = last_ver.filetype
ob.deleted = last_ver.deleted
ob.unfrozen_perms = last_ver.unfrozen_perms
ob.symlink_target = last_ver.symlink_target
ob.sha256 = last_ver.sha256
ob.last_version = last_ver
return ob
def flatten_tree(self, level=0):
"""Return list of all entries, with level, in pairs"""
@ -244,6 +321,32 @@ class FSEntry:
pairs.extend(c.flatten_tree(level=level + 1))
return pairs
def __str__(self):
return self.to_string(level=0)
def to_string(self, level=0):
if len(self.children) == 0:
childsec = "[]"
else:
childstrs = [c.to_string(level=level + 1) for c in self.children]
childsep = '\n\n'
childsec = childsep + childsep.join(c for c in childstrs)
# TODO: list versions in str()
#versions: [FSEntryVersion] = []
return '\n'.join((' ' * level) + l for l in f"""id: {self.id}
filename: {self.filename}
relpath: {self.relpath}
parent (relpath): {'None' if self.parent is None else self.parent.relpath}
filetype: {self.filetype}
deleted: {self.deleted}
unfrozen_perms: {self.unfrozen_perms}
symlink_target: {self.symlink_target}
sha256: {self.sha256.hex()}
children: {childsec}
""".splitlines())
def sort_diffs_filename(diffs):
name_ent = {e.filename(): e for e in diffs}
@ -259,7 +362,7 @@ class FSDiff:
@staticmethod
def compare(A, B):
return A.sha256 == B.sha256 and \
A.perms == B.perms and \
A.unfrozen_perms == B.unfrozen_perms and \
A.filetype == B.filetype and \
A.deleted == B.deleted
@ -302,7 +405,7 @@ class FSDiff:
Alist = {c.filename: c for c in A.children}
Blist = {c.filename: c for c in B.children}
allnames = set(Alist.keys() + Blist.keys())
allnames = set(list(Alist.keys()) + list(Blist.keys()))
modified_children = [cls.compute(
Alist.get(n, None),

View File

@ -98,67 +98,55 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL,
FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE
);
-- The filedir table holds all files and directories that are tracked by the
-- store. Files and directories are distinguished by the filetype column. This
-- table also holds tracked files and directories that have been imported and
-- live outside the current store.
-- store. This table also holds tracked files and directories that have been
-- imported and live outside the current store.
-- We do not support renaming files. Once an entry is created here, it should
-- only be updated to reflect frozen/thawed status.
CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL,
store INTEGER NOT NULL,
filename TEXT, -- only a filename, not a path
name TEXT, -- only a filename, not a path
parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
-- Note that changes in filetype are seen as a delete/replace instead of an
-- update. If such a change is noticed, the original filedir entry must be
-- marked deleted and a new one created with the new filetype.
frozen BOOL NOT NULL,
deleted BOOL NOT NULL, -- set True when recording a deleted file
-- NOTE: a deleted file should not be updated, other than through a cascade
UNIQUE(store, filename, parent),
FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE
UNIQUE(store, name, parent)
);
-- Detect cross-store references
CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir
BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR'
THEN RAISE (ABORT, 'Parent is not listed as a directory')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
THEN RAISE (ABORT, 'Parent resides in different store')
END; END;
CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir
BEGIN SELECT CASE
WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR'
THEN RAISE (ABORT, 'Parent is not listed as a directory')
WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Parent directory resides in different store')
END; END;
-- Files that are recorded to be deleted have the deleted column set to True.
-- These files should no longer be used. In particular, we should never rename
-- or update them since their lifetime is over.
INSERT INTO triggers VALUES('update_deleted_filedir', TRUE);
CREATE TRIGGER update_deleted_filedir BEFORE UPDATE ON filedir
BEGIN SELECT CASE
WHEN OLD.deleted AND (SELECT enabled FROM triggers WHERE name = 'update_deleted_filedir')
THEN RAISE (ABORT, 'Cannot modify filedir entry for deleted file')
WHEN (NEW.id != OLD.id OR NEW.store != OLD.store OR NEW.parent != OLD.parent)
THEN RAISE (ABORT, 'The only updates to filedir allowed are to the frozen column')
END; END;
-- This table holds _versions_ of files. This table holds information that is
-- independent of the location within the filesystem and merely indicates a
-- version of the content in a format that is nearly independent of filetype
-- (though the computation of content hashes is of course dependent on
-- filetype). Each version has a number, and was provided by some program (and
-- potentially a datum).
-- This table holds _versions_ of files and directories. This table holds
-- information that is independent of the location within the filesystem and
-- merely indicates a version of the content in a format that is nearly
-- independent of filetype (though the computation of content hashes is of
-- course dependent on filetype). Each version has a number, and was provided by
-- some program (and potentially a datum).
CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
filedir INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
version_counter INTEGER NOT NULL, -- incremented whenever a new version of the file is recorded
filedir INTEGER NOT NULL
REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
recorded_time REAL, -- When was this version recorded?
-- Note that changing filetype (e.g. directory becomes file) or deleting a
-- file are simply just new versions of a filedir.
filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
deleted BOOL NOT NULL, -- set True when recording a deleted file
-- We record the permissions on each file, in a way that enables reloading
-- permissions properly when thawing after a freeze operation.
unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink.
-- The following hash can be NULL if the file was deleted. It could also be
-- null if hashing is deferred. Hash deferring would be preferred in cases
@ -170,26 +158,19 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL,
-- can defer by only hashing files and directories at the end of a program.
sha256 TEXT,
source_task INTEGER,
UNIQUE(filedir, version),
FOREIGN KEY (source_task) REFERENCES task (id) ON UPDATE CASCADE
source_task INTEGER REFERENCES task (id) ON UPDATE CASCADE
);
-- Disallow inserting or deleting versions for deleted files. (the version
-- deleting the file should be added first, then the deleted field set on the
-- filedir row). The update trigger is toggleable to enable importing.
CREATE TRIGGER insert_filedir_version_parent_deleted BEFORE INSERT ON filedir_version
-- Disallow UPDATING filedir_version. Instead, new version should be created.
-- One exception is during importing, in which case we can disable the trigger
INSERT INTO triggers VALUES('update_filedir_version', TRUE);
CREATE TRIGGER update_filedir_version BEFORE UPDATE ON filedir_version
BEGIN SELECT CASE
WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent)
THEN RAISE (ABORT, 'Attempted to insert filedir_version whose filedir was deleted')
END; END;
INSERT INTO triggers VALUES('update_filedir_version_parent_deleted', TRUE);
CREATE TRIGGER update_filedir_version_parent_deleted BEFORE UPDATE ON filedir_version
BEGIN SELECT CASE
WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent)
AND (SELECT enabled FROM triggers WHERE name = 'update_filedir_version_parent_deleted')
THEN RAISE (ABORT, 'Attempted to update filedir_version whose parent was deleted')
WHEN (SELECT enabled FROM triggers WHERE name = 'update_filedir_version')
THEN RAISE (ABORT,
'Updating filedir versions is prohibited, other than for id changes')
END; END;
-- TODO: check for inserting inconsistent version info re. deleted flag/filetype
-- A computational environment which can execute "programs". Note that the

View File

@ -46,12 +46,20 @@ class Program:
self.message, #message TEXT, -- user-defined message to help distinguish similar runs
))
self.id = cur.lastrowid
cur.connection.commit()
self.set_start_time(datetime.datetime.now())
return self
def new_task(self, name, py_function_id=None):
"""Create a new task and return its id"""
cur = self.store.conn.cursor()
cur.execute(
"INSERT INTO task VALUES (?, ?, ?)",
(None, self.id, py_function_id),
)
return cur.lastrowid
def __exit__(self, exc_type, exc_value, exc_traceback):
end_time = datetime.datetime.now()
# record start and end times in store
@ -142,7 +150,7 @@ class Store:
"""Get the database id for the table entry in this store having name '.'"""
if cur is None:
cur = self.conn.cursor()
cur.execute('SELECT id FROM filedir WHERE store=0 AND parent is NULL;')
cur.execute('SELECT id FROM filedir WHERE store=1 AND parent is NULL')
return cur.fetchone()
def path_to_fsentry(self, path):
@ -177,52 +185,111 @@ class Store:
return fd_id
return fs.FSEntry.from_db_index(cur, root_id=fd_id)
def recorded_status(self, filepath):
recorded = self.path_to_fsentry(filepath)
def fs_entries(self, shallow=False):
"""Return recursive structure containing FSEntry objects from db"""
root_id = self.filedir_root_index()
root_id = self.filedir_root_index()
logger.debug('root_id={}', root_id)
if root_id is None:
logger.trace("Empty root")
return fs.FSEntry.empty_root()
else:
return fs.FSEntry.from_db_index(cur, root_id=root_id)
logger.trace("Non-empty root", root_id)
return fs.FSEntry.from_db_index(self.conn.cursor(), root_id=root_id)
def program(self, name, message=None):
return Program(self, name, message)
def diff(self, filepath):
def diff(self):
"""
Diff a file or directory compared to its recorded version
Find changes to files and directories compared to their recorded versions
"""
# get info about current files at this location
current = fs.FSEntry.from_path(
self.path,
# send a relative path from self.path to filepath, which is user
# provided
os.path.relpath(os.path.realpath(filepath), os.path.realpath(self.path)),
)
logger.trace("DIFF")
# get info about files currently at the given locations
current = fs.FSEntry.from_path(self.path)
recorded = self.path_to_fsentry(filepath)
logger.debug("CURRENT: \n{}", str(current))
# then find a listing covering all the expected paths
#recorded = self.recorded_status(self.path)
recorded = self.fs_entries(shallow=True)
logger.debug("RECORDED: \n{}", str(recorded))
return fs.FSDiff.compute(recorded, current)
def record(self, diff, message=None):
def _record_file_version(self, cur, ob, filedir_id, source_task=None):
cur.execute(
'INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
(
None,
filedir_id,
datetime.datetime.now().timestamp(),
ob.filetype,
False,
ob.unfrozen_perms,
ob.symlink_target,
ob.sha256.hex(),
source_task,
)
)
def _record_new_file_recursive(self, ob, cur, parent_id, source_task):
# Find entries with this name and parent
cur.execute(
"SELECT id FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1",
(ob.filename, None if ob.parent is None else ob.parent.id),
)
res = cur.fetchall()
if len(res) == 0:
# create filedir entry and get its id
cur.execute(
'INSERT INTO filedir VALUES (?, ?, ?, ?, ?)',
(
None,
1,
ob.filename,
parent_id,
False,
))
thisid = cur.lastrowid
else:
thisid, = res[0]
self._record_file_version(cur, ob, thisid, source_task=source_task)
# descend into children and record all of them anew as well
for c in ob.children:
self._record_new_file_recursive(c, cur, thisid, source_task)
def _record_recursive(self, diff, cur, parent_id=None, source_task=None):
"""Record this level of a diff."""
if diff.A is None:
self._record_new_file_recursive(diff.B, cur, parent_id, source_task=None)
elif diff.B is None:
self._record_deleted_file_recursive(diff.B, cur, parent_id)
else:
# modified
pass
# descend into children
def record(self, diff, parent_id=None, message=None, cur=None):
if cur is None:
cur = self.conn.cursor()
with self.program('RECORD', message) as p:
for _, d in diff.flatten_tree():
pass
# create entries for all directories if they do not yet exist (top
# down)
# insert files and symlinks into filedir, computing checksums on
# each
# update versions in nancy.db as appropriate
# create a task for this operation
task_id = p.new_task('Store._record_recursive')
# descend the diff, tracking parent filedir IDs, creating them and
# recording new versions of each, when necessary
self._record_recursive(diff, cur, source_task=task_id)
#@contextmanager
def run(

View File

@ -3,6 +3,7 @@ Pure SQL tests that don't depend on nancy's Python code
"""
import pytest
import datetime
import os
import sqlite3
@ -156,31 +157,60 @@ def insert_directories(insert_store):
cur = insert_store
cur.executemany(
'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
'.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
'foo', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL,
'.', #filename TEXT, -- only a filename, not a path
None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
)],
)
cur.executemany(
'INSERT INTO filedir_version VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
)],
)
return cur
@ -191,15 +221,13 @@ def test_crossstore_directory_insert(insert_directories):
# declaring directory as belonging to store 2, but parent's store is 1
cur.execute(
'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?)',
(
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL,
'some_dir', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
))
for row in cur.connection.iterdump():
print(row)
@ -210,55 +238,79 @@ def test_crossstore_directory_insert(insert_directories):
@pytest.fixture
def insert_files(insert_directories):
cur = insert_directories
cur.execute('SELECT COUNT(*) FROM filedir')
nprev, = cur.fetchone()
cur.executemany(
'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
'example.csv', #filename TEXT, -- only a filename, not a path
1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
1, #store INTEGER NOT NULL,
'plots.png', #filename TEXT, -- only a filename, not a path
2, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
)]
)
cur.executemany(
'INSERT INTO filedir_version VALUES '
'(?, ?, ?, ?, ?, ?, ?, ?, ?)',
[(
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), ( # second version of first file
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
), (
None, #id INTEGER PRIMARY KEY NOT NULL,
nprev + 2, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry
datetime.datetime.now().timestamp(),
'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #deleted BOOL NOT NULL, -- set True when recording a deleted file
'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--'
None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink.
'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT,
None, #source_task INTEGER,
)])
return cur
def test_nondir_parent_directory_insert(insert_files):
# TODO: This test is disabled until triggers are added to check for these types
# of constraints. These became much more complicated to check when I added
# filedir_version.
def disabled_test_nondir_parent_directory_insert(insert_files):
cur = insert_files
with pytest.raises(sqlite3.IntegrityError):
# declaring parent as 5, but 5 is a file (plots.png)
cur.execute(
'INSERT INTO filedir VALUES '
'(?, ?, ?, ?, ?, ?, ?)',
'(?, ?, ?, ?, ?)',
(
None, #id INTEGER PRIMARY KEY NOT NULL,
2, #store INTEGER NOT NULL,
1, #store INTEGER NOT NULL,
'some_filedir.txt', #filename TEXT, -- only a filename, not a path
5, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE,
'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details
False, #frozen BOOL NOT NULL,
False, #deleted BOOL NOT NULL,
))
for row in cur.connection.iterdump():
print(row)
cur.execute('SELECT * FROM filedir')
print(cur.fetchall())
def test_update_deleted_file(insert_files):
cur = insert_files
# first we set a file to deleted
cur.execute('UPDATE filedir SET deleted=True WHERE id=5')
with pytest.raises(sqlite3.IntegrityError):
# Now we try and update it, which should fail due to trigger
cur.execute('UPDATE filedir SET frozen=True WHERE id=5')

1
tests/test_fs.py Normal file
View File

@ -0,0 +1 @@
from nancy import fs

View File

@ -22,7 +22,7 @@ def filled_dir(bare_dir):
def test_record_untracked_dir(filled_dir):
from nancy.cli.record import record
record(filled_dir)
record(filled_dir, message='test_record_untracked_dir')
@pytest.fixture