diff --git a/src/nancy/cli/diff.py b/src/nancy/cli/diff.py index 3516d99..8553574 100644 --- a/src/nancy/cli/diff.py +++ b/src/nancy/cli/diff.py @@ -1,7 +1,8 @@ import click from loguru import logger -from .. import db, fs, store +from .. import db, fs +from ..store import find_store, Store import os import sys @@ -32,7 +33,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True, hashcolor = Fore.MAGENTA if use_color else '' def _print_row(tag, entry, level): - relpath = entry.relpath[2:] + relpath = entry.relpath # Format relpath using filetype-based colors dname, fname = os.path.split(relpath) @@ -68,19 +69,7 @@ def print_diff(ABdiff: fs.FSDiff, indent=2, indent_level=0, use_color=True, _print_row('MOD', d.B, l) -def diff(store, filedir_path, show_hashes=False, use_color=True): - """Unwrapped diff command that prints a diff""" - if not os.path.exists(filedir_path): - raise FileNotFoundError(f"Cannot diff non-existent file or directory {filedir_path}") - - # get the diff object - fsdiff = store.diff(filedir_path) - - print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color) - - @click.command() -@click.argument("path", default='.') @click.option( '-H', "--show-hashes", is_flag=True, @@ -91,8 +80,16 @@ def diff(store, filedir_path, show_hashes=False, use_color=True): is_flag=True, help='If given, do not print any color output.', ) -def diff_cli(path, show_hashes, no_color): +@click.option( + "-s", "--store", + type=str, + default=None, + help='Top-level of store. If omitted, use closest common parent directory ' + 'of given paths. If given the path to a non-store directory, a new ' + 'store is initialized there.', +) @logger.catch +def diff_cli(show_hashes, no_color, store): """Detect and describe changes to PATH PATH is a path to a file or directory inside an existing nancy store @@ -101,22 +98,28 @@ def diff_cli(path, show_hashes, no_color): However, the printed diff information pertains to the given value of PATH. """ - storepath = store.find_store(path) + if store is None: + cwd = os.getcwd() + storepath = find_store(cwd) - if storepath is None: - print("Could not find nancy.db in any directory containing " - + str(os.path.realpath(path)), file=sys.stderr) - sys.exit(1) + if storepath is None: + logger.error( + "Could not find nancy.db in any directory containing {}", + os.path.realpath(cwd), + ) + sys.exit(1) - print("Paths relative to store path:", storepath) + print("Paths relative to store path:", store) # connect to store - s = store.Store(storepath) + s = Store(store) - try: - diff(store=s, filedir_path=path, show_hashes=show_hashes, use_color=not no_color) - except FileNotFoundError as e: - print(str(e), file=sys.stderr) - sys.exit(1) + d = s.diff() logger.success("Computed diff") + print_diff( + d, + show_hashes=show_hashes, + use_color=not no_color, + ) + diff --git a/src/nancy/cli/record.py b/src/nancy/cli/record.py index d93061e..1e21779 100644 --- a/src/nancy/cli/record.py +++ b/src/nancy/cli/record.py @@ -1,7 +1,7 @@ import click from loguru import logger -from .. import store +from .. import fs, store from .common import confirm from .diff import print_diff @@ -9,21 +9,27 @@ from .diff import print_diff import os import sys - -def record(directory, message, show_diff=True, show_hashes=False, use_color=True, @logger.catch +def record(message, store_path=None, show_diff=True, show_hashes=False, use_color=True, skip_confirm=False): """Unwrapped record command""" - if not os.path.isdir(directory): - raise ValueError(f"Cannot record non-existent directory {directory}") - existing_store = store.find_store(directory) - if existing_store is None: # this is a new store - s = store.Store.init(directory) + if store_path is None: + curdir = os.path.realpath(os.getcwd()) + logger.info("Looking for store in", curdir) + store_path = store.find_store(curdir) + if store_path is None: # If no store found, assume we're creating here + store_path = curdir + + + if not os.path.exists(os.path.join(store_path, 'nancy.db')): + # this is a new store + logger.info(f"Initializing new store in {store_path}...") + s = store.Store.init(store_path) else: # this is an existing store - s = store.Store(directory) + s = store.Store(store_path) - fsdiff = s.diff(directory) + fsdiff = s.diff() if show_diff: print_diff(fsdiff, show_hashes=show_hashes, use_color=use_color) @@ -38,7 +44,6 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True @click.command() -@click.argument("directory", default='.') @click.option( '-H', "--show-hashes", is_flag=True, @@ -54,15 +59,19 @@ def record(directory, message, show_diff=True, show_hashes=False, use_color=True type=str, required=True, help='A user-defined descriptive message for this recording operation.', -def record_cli(directory, show_hashes, no_color, message): +) +@click.option( + "-s", "--store", + type=str, + default=None, + help='Top-level of store. If omitted, use closest common parent directory ' + 'of given paths. If given the path to a non-store directory, a new ' + 'store is initialized there.', +) +def record_cli(show_hashes, no_color, message, store): """ Initialize tracking or record changes to a tracked directory. - - If DIRECTORY is not already part of an existing nancy store, then a new - 'nancy.db' file is created in that directory. On the other hand, if the - directory is part of an existing store, it will be updated and versions - of any files changes since the last recording will be incremented. """ - record(directory, message=message, show_hashes=show_hashes, use_color=not - no_color) + record(message=message, show_hashes=show_hashes, use_color=not + no_color, store_path=store) diff --git a/src/nancy/fs.py b/src/nancy/fs.py index 460716b..88fd8a1 100644 --- a/src/nancy/fs.py +++ b/src/nancy/fs.py @@ -1,10 +1,13 @@ """Interaction with the filesystem and with file database entries""" -from dataclasses import dataclass from loguru import logger + +from dataclasses import asdict, dataclass +from datetime import datetime import hashlib import operator import os +from pathlib import Path import stat from typing import List import warnings @@ -64,15 +67,32 @@ def make_readonly_recursive(path, excluded=[]): @dataclass class FSEntryVersion: - """A hashed file or directory.""" + """A version of a file or directory.""" + id: int + filedir: 'FSEntry' + recorded_time: datetime #When was this version recorded? + filetype: str #One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + deleted: bool #set True when recording a deleted file + unfrozen_perms: str # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' symlink_target:str # if this is a symlink, this is the (read but not fully # resolved) target. I.e. this is the "content" of the symlink. sha256: str - id: int = None # should not be null - version_counter: int = None # incremented whenever a new version of the file is recorded source_task_id: int = None + @classmethod + def from_row(cls, row, filedir=None): + if filedir is None: + filedir = row[1] + return cls( + row[0], + filedir, + datetime.fromtimestamp(row[2]), + *row[3:-2], + bytes.fromhex(row[-2]), + row[-1] + ) + @dataclass class FSEntry: @@ -80,11 +100,12 @@ class FSEntry: id: int # defaults to None filename: str # with parent directory stripped. None if this is the root relpath: str # relative to some root directory + parent: 'FSEntry' # upward link # children for dirs only: non-recursive; files/dirs at this level only children: List['FSEntry'] filetype: str # regular, symlink, special (block, char, pipe, or socket) deleted: bool - versions: [FSEntryVersion] = [] + versions: List[FSEntryVersion] = None # these will be filled from the version list automatically unfrozen_perms: str = None # stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' @@ -94,22 +115,32 @@ class FSEntry: latest_version: FSEntryVersion = None def __post_init__(self): - if len(self.versions) > 0: + if self.versions is not None and len(self.versions) > 0: self.latest_version = self.versions[-1] self.unfrozen_perms = self.latest_version.unfrozen_perms self.symlink_target = self.latest_version.symlink_target self.sha256 = self.latest_version.sha256 @classmethod - def from_path(cls, root, relpath='.', filestat=None, exclude=['./nancy.db']): + def from_path(cls, root, relpath=None, exclude=['nancy.db'], parent=None, + direntry=None): """ Scan a path to instantiate (recursive). + + Arguments: + root (str or PathLike): The root directory of an existing or new store path + relpath (str or PathLike): Path of some directory under the store + path in which to find files and directories. Only these entries + and their children will be included. """ m = hashlib.sha256() - path = os.path.join(root, relpath) + if relpath is None: # top-level invocation at root + path = root + else: + path = os.path.join(root, relpath) - filestat = os.stat(path) if filestat is None else filestat + filestat = os.lstat(path) if direntry is None else direntry.stat(follow_symlinks=False) s = filestat.st_mode children = [] @@ -124,7 +155,8 @@ class FSEntry: # this prevents a directory's hash from colliding with a file hash # in cases where it only holds a single file - m.update(bytes(relpath, 'utf-8')) + if relpath is not None: + m.update(bytes(relpath, 'utf-8')) # we use os.scandir which returns a DirEntry for each child # excluding "." and "..". These variables hold a .stat which we can @@ -132,14 +164,17 @@ class FSEntry: direntries = list(os.scandir(path)) direntries.sort(key=operator.attrgetter('name')) + childrenrelpaths = ((e.name if relpath is None else + os.path.join(relpath, e.name)) for e in direntries) + children = [ cls.from_path( root=root, - relpath=os.path.join(relpath, e.name), - filestat=e.stat(), + relpath=rp, + direntry=e, ) - for e in direntries \ - if os.path.join(relpath, e.name) not in exclude + for rp, e in zip(childrenrelpaths, direntries) + if rp not in exclude ] for c in children: # now hash concatenated sorted hashes # hash on perms+sha256 to enable recursively detecting perm @@ -170,21 +205,41 @@ class FSEntry: sha256 = m.digest() - return cls( + ob = cls( id=None, - filename=os.path.basename(relpath), - relpath=relpath, + filename='.' if relpath is None else os.path.basename(relpath), + relpath='.' if relpath is None else relpath, + parent=parent, children=children, - filetype=filetype, - deleted=False, + filetype=None, + deleted=None, versions=[ FSEntryVersion( - unfrozen_perms=stat.filemode(filestat.st_mode), + id=None, + filedir=None, + recorded_time=datetime.now().timestamp(), + filetype=filetype, + deleted=False, + unfrozen_perms=stat.filemode(s), symlink_target=symlink_target, sha256=sha256, + source_task_id=None, ) ], ) + # now change children's parents to point to this object + for v in ob.versions: + v.filedir = ob + if len(ob.versions) > 0: + last_ver = ob.versions[-1] + ob.filetype = last_ver.filetype + ob.deleted = last_ver.deleted + ob.unfrozen_perms = last_ver.unfrozen_perms + ob.symlink_target = last_ver.symlink_target + ob.sha256 = last_ver.sha256 + for c in ob.children: + c.parent = ob + return ob @classmethod def empty_root(cls): @@ -193,49 +248,71 @@ class FSEntry: id=None, filename='.', relpath='.', + parent=None, children=[], filetype='DIR', - perms='----------', + unfrozen_perms='----------', sha256=hashlib.sha256().digest(), deleted=False, ) @classmethod - def from_db_index(cls, cursor, root_id=None, root_row=None): @logger.catch + def from_db_index(cls, cursor, root_id=None, root_row=None, parent=None): """Given id of an entry in filedir, recursively fill this object""" - fields = 'id, filename, filetype, frozen, deleted' if root_row is None: assert root_id is not None - cursor.execute(f'SELECT {fields} FROM filedir WHERE id=?', root_id) + logger.debug('root_id({})={}', type(root_id), root_id) + cursor.execute( + 'SELECT id, name, frozen FROM filedir WHERE id=?', + (root_id,), + ) root_row = cursor.fetchone() - root_id, filename, filetype, frozen, deleted = root_row + root_id, filename, frozen = root_row - cursor.execute(f'SELECT {fields} FROM filedir WHERE parent=? ORDER BY name', root_id) - rows = cursor.fetchall() - children = [cls.from_db_index(cursor, root_row=r) for r in rows] + relpath = filename if parent is None else os.path.join(parent.relpath, filename) - fields = 'id, version_counter, unfrozen_perms, symlink_target, sha256, source_task_id' - cursor.execute(f'SELECT {fields} FROM filedir_version WHERE filedir=? ORDER BY version', self.id) - matches = cursor.fetchall() - versions = [FSEntryVersion(*row) for row in matches] - - return cls( + # instantiate class before filling children + ob = cls( id=root_id, filename=filename, relpath=relpath, - children=children, - filetype=filetype, - perms=unfrozen_perms, - sha256=bytes.fromhex(sha256), - deleted=deleted, - versions=versions, + parent=parent, + children=[], + filetype=None, + unfrozen_perms=None, + sha256=None, + deleted=None, + versions=[], ) - def versions(self, cursor): - """Get a list from the database of FSEntryVersion objects.""" - if self.id is None: - return None + cursor.execute(f''' + SELECT id, name, frozen + FROM filedir + WHERE parent=? + ''', (root_id,)) + rows = cursor.fetchall() + ob.children = [cls.from_db_index(cursor, root_row=r, parent=ob) for r in rows] + + # get all versions + fields = ('') + cursor.execute(f''' + SELECT * FROM filedir_version WHERE filedir=? ORDER BY recorded_time + ''', (root_id,)) + matches = cursor.fetchall() + versions = [FSEntryVersion.from_row(row, filedir=ob) for row in matches] + + if len(versions) > 0: + last_ver = versions[-1] + ob.filetype = last_ver.filetype + ob.deleted = last_ver.deleted + ob.unfrozen_perms = last_ver.unfrozen_perms + ob.symlink_target = last_ver.symlink_target + ob.sha256 = last_ver.sha256 + ob.last_version = last_ver + + return ob + def flatten_tree(self, level=0): """Return list of all entries, with level, in pairs""" @@ -244,6 +321,32 @@ class FSEntry: pairs.extend(c.flatten_tree(level=level + 1)) return pairs + def __str__(self): + return self.to_string(level=0) + + def to_string(self, level=0): + if len(self.children) == 0: + childsec = "[]" + else: + childstrs = [c.to_string(level=level + 1) for c in self.children] + childsep = '\n\n' + childsec = childsep + childsep.join(c for c in childstrs) + + # TODO: list versions in str() + #versions: [FSEntryVersion] = [] + + return '\n'.join((' ' * level) + l for l in f"""id: {self.id} +filename: {self.filename} +relpath: {self.relpath} +parent (relpath): {'None' if self.parent is None else self.parent.relpath} +filetype: {self.filetype} +deleted: {self.deleted} +unfrozen_perms: {self.unfrozen_perms} +symlink_target: {self.symlink_target} +sha256: {self.sha256.hex()} +children: {childsec} +""".splitlines()) + def sort_diffs_filename(diffs): name_ent = {e.filename(): e for e in diffs} @@ -259,7 +362,7 @@ class FSDiff: @staticmethod def compare(A, B): return A.sha256 == B.sha256 and \ - A.perms == B.perms and \ + A.unfrozen_perms == B.unfrozen_perms and \ A.filetype == B.filetype and \ A.deleted == B.deleted @@ -302,7 +405,7 @@ class FSDiff: Alist = {c.filename: c for c in A.children} Blist = {c.filename: c for c in B.children} - allnames = set(Alist.keys() + Blist.keys()) + allnames = set(list(Alist.keys()) + list(Blist.keys())) modified_children = [cls.compute( Alist.get(n, None), diff --git a/src/nancy/schema/version0.sql b/src/nancy/schema/version0.sql index 38d2e97..6c8951a 100644 --- a/src/nancy/schema/version0.sql +++ b/src/nancy/schema/version0.sql @@ -98,67 +98,55 @@ CREATE TABLE store (id INTEGER PRIMARY KEY NOT NULL, FOREIGN KEY (machine) REFERENCES machine (id) ON UPDATE CASCADE ); + -- The filedir table holds all files and directories that are tracked by the --- store. Files and directories are distinguished by the filetype column. This --- table also holds tracked files and directories that have been imported and --- live outside the current store. +-- store. This table also holds tracked files and directories that have been +-- imported and live outside the current store. +-- We do not support renaming files. Once an entry is created here, it should +-- only be updated to reflect frozen/thawed status. CREATE TABLE filedir (id INTEGER PRIMARY KEY NOT NULL, store INTEGER NOT NULL, - filename TEXT, -- only a filename, not a path + name TEXT, -- only a filename, not a path parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - - filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details - -- Note that changes in filetype are seen as a delete/replace instead of an - -- update. If such a change is noticed, the original filedir entry must be - -- marked deleted and a new one created with the new filetype. - frozen BOOL NOT NULL, - deleted BOOL NOT NULL, -- set True when recording a deleted file - -- NOTE: a deleted file should not be updated, other than through a cascade - UNIQUE(store, filename, parent), - FOREIGN KEY (store) REFERENCES store (id) ON UPDATE CASCADE + UNIQUE(store, name, parent) ); +-- Detect cross-store references CREATE TRIGGER insert_filedir BEFORE INSERT ON filedir BEGIN SELECT CASE - WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR' - THEN RAISE (ABORT, 'Parent is not listed as a directory') WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent) - THEN RAISE (ABORT, 'Parent directory resides in different store') + THEN RAISE (ABORT, 'Parent resides in different store') END; END; CREATE TRIGGER update_filedir BEFORE UPDATE ON filedir BEGIN SELECT CASE - WHEN NEW.parent IS NOT NULL AND (SELECT filetype FROM filedir WHERE id = NEW.parent) != 'DIR' - THEN RAISE (ABORT, 'Parent is not listed as a directory') - WHEN NEW.parent IS NOT NULL AND NEW.store != (SELECT store FROM filedir WHERE id = NEW.parent) - THEN RAISE (ABORT, 'Parent directory resides in different store') -END; END; --- Files that are recorded to be deleted have the deleted column set to True. --- These files should no longer be used. In particular, we should never rename --- or update them since their lifetime is over. -INSERT INTO triggers VALUES('update_deleted_filedir', TRUE); -CREATE TRIGGER update_deleted_filedir BEFORE UPDATE ON filedir -BEGIN SELECT CASE - WHEN OLD.deleted AND (SELECT enabled FROM triggers WHERE name = 'update_deleted_filedir') - THEN RAISE (ABORT, 'Cannot modify filedir entry for deleted file') + WHEN (NEW.id != OLD.id OR NEW.store != OLD.store OR NEW.parent != OLD.parent) + THEN RAISE (ABORT, 'The only updates to filedir allowed are to the frozen column') END; END; --- This table holds _versions_ of files. This table holds information that is --- independent of the location within the filesystem and merely indicates a --- version of the content in a format that is nearly independent of filetype --- (though the computation of content hashes is of course dependent on --- filetype). Each version has a number, and was provided by some program (and --- potentially a datum). +-- This table holds _versions_ of files and directories. This table holds +-- information that is independent of the location within the filesystem and +-- merely indicates a version of the content in a format that is nearly +-- independent of filetype (though the computation of content hashes is of +-- course dependent on filetype). Each version has a number, and was provided by +-- some program (and potentially a datum). CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL, - filedir INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry - version_counter INTEGER NOT NULL, -- incremented whenever a new version of the file is recorded + filedir INTEGER NOT NULL + REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + + recorded_time REAL, -- When was this version recorded? + + -- Note that changing filetype (e.g. directory becomes file) or deleting a + -- file are simply just new versions of a filedir. + filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + deleted BOOL NOT NULL, -- set True when recording a deleted file -- We record the permissions on each file, in a way that enables reloading -- permissions properly when thawing after a freeze operation. unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' - symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. i.e. this is the "content" of the symlink. -- The following hash can be NULL if the file was deleted. It could also be -- null if hashing is deferred. Hash deferring would be preferred in cases @@ -170,26 +158,19 @@ CREATE TABLE filedir_version (id INTEGER PRIMARY KEY NOT NULL, -- can defer by only hashing files and directories at the end of a program. sha256 TEXT, - source_task INTEGER, - - UNIQUE(filedir, version), - FOREIGN KEY (source_task) REFERENCES task (id) ON UPDATE CASCADE + source_task INTEGER REFERENCES task (id) ON UPDATE CASCADE ); --- Disallow inserting or deleting versions for deleted files. (the version --- deleting the file should be added first, then the deleted field set on the --- filedir row). The update trigger is toggleable to enable importing. -CREATE TRIGGER insert_filedir_version_parent_deleted BEFORE INSERT ON filedir_version +-- Disallow UPDATING filedir_version. Instead, new version should be created. +-- One exception is during importing, in which case we can disable the trigger +INSERT INTO triggers VALUES('update_filedir_version', TRUE); +CREATE TRIGGER update_filedir_version BEFORE UPDATE ON filedir_version BEGIN SELECT CASE - WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent) - THEN RAISE (ABORT, 'Attempted to insert filedir_version whose filedir was deleted') -END; END; -INSERT INTO triggers VALUES('update_filedir_version_parent_deleted', TRUE); -CREATE TRIGGER update_filedir_version_parent_deleted BEFORE UPDATE ON filedir_version -BEGIN SELECT CASE - WHEN (SELECT deleted FROM filedir WHERE id = NEW.parent) - AND (SELECT enabled FROM triggers WHERE name = 'update_filedir_version_parent_deleted') - THEN RAISE (ABORT, 'Attempted to update filedir_version whose parent was deleted') + WHEN (SELECT enabled FROM triggers WHERE name = 'update_filedir_version') + THEN RAISE (ABORT, + 'Updating filedir versions is prohibited, other than for id changes') END; END; +-- TODO: check for inserting inconsistent version info re. deleted flag/filetype + -- A computational environment which can execute "programs". Note that the diff --git a/src/nancy/store.py b/src/nancy/store.py index 5297d24..90706d9 100644 --- a/src/nancy/store.py +++ b/src/nancy/store.py @@ -46,12 +46,20 @@ class Program: self.message, #message TEXT, -- user-defined message to help distinguish similar runs )) self.id = cur.lastrowid - cur.connection.commit() self.set_start_time(datetime.datetime.now()) return self + def new_task(self, name, py_function_id=None): + """Create a new task and return its id""" + cur = self.store.conn.cursor() + cur.execute( + "INSERT INTO task VALUES (?, ?, ?)", + (None, self.id, py_function_id), + ) + return cur.lastrowid + def __exit__(self, exc_type, exc_value, exc_traceback): end_time = datetime.datetime.now() # record start and end times in store @@ -142,7 +150,7 @@ class Store: """Get the database id for the table entry in this store having name '.'""" if cur is None: cur = self.conn.cursor() - cur.execute('SELECT id FROM filedir WHERE store=0 AND parent is NULL;') + cur.execute('SELECT id FROM filedir WHERE store=1 AND parent is NULL') return cur.fetchone() def path_to_fsentry(self, path): @@ -177,52 +185,111 @@ class Store: return fd_id return fs.FSEntry.from_db_index(cur, root_id=fd_id) + def recorded_status(self, filepath): + recorded = self.path_to_fsentry(filepath) def fs_entries(self, shallow=False): """Return recursive structure containing FSEntry objects from db""" - root_id = self.filedir_root_index() + root_id = self.filedir_root_index() logger.debug('root_id={}', root_id) if root_id is None: logger.trace("Empty root") return fs.FSEntry.empty_root() else: - return fs.FSEntry.from_db_index(cur, root_id=root_id) + logger.trace("Non-empty root", root_id) + return fs.FSEntry.from_db_index(self.conn.cursor(), root_id=root_id) def program(self, name, message=None): return Program(self, name, message) - def diff(self, filepath): + def diff(self): """ - Diff a file or directory compared to its recorded version + Find changes to files and directories compared to their recorded versions """ - # get info about current files at this location - current = fs.FSEntry.from_path( - self.path, - # send a relative path from self.path to filepath, which is user - # provided - os.path.relpath(os.path.realpath(filepath), os.path.realpath(self.path)), - ) logger.trace("DIFF") + # get info about files currently at the given locations + current = fs.FSEntry.from_path(self.path) - recorded = self.path_to_fsentry(filepath) logger.debug("CURRENT: \n{}", str(current)) + # then find a listing covering all the expected paths + #recorded = self.recorded_status(self.path) + recorded = self.fs_entries(shallow=True) + logger.debug("RECORDED: \n{}", str(recorded)) + return fs.FSDiff.compute(recorded, current) - def record(self, diff, message=None): + + def _record_file_version(self, cur, ob, filedir_id, source_task=None): + cur.execute( + 'INSERT INTO filedir_version VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + None, + filedir_id, + datetime.datetime.now().timestamp(), + ob.filetype, + False, + ob.unfrozen_perms, + ob.symlink_target, + ob.sha256.hex(), + source_task, + ) + ) + + def _record_new_file_recursive(self, ob, cur, parent_id, source_task): + # Find entries with this name and parent + cur.execute( + "SELECT id FROM filedir WHERE store = 1 AND name = ? AND parent = ? LIMIT 1", + (ob.filename, None if ob.parent is None else ob.parent.id), + ) + res = cur.fetchall() + if len(res) == 0: + # create filedir entry and get its id + cur.execute( + 'INSERT INTO filedir VALUES (?, ?, ?, ?, ?)', + ( + None, + 1, + ob.filename, + parent_id, + False, + )) + thisid = cur.lastrowid + else: + thisid, = res[0] + + self._record_file_version(cur, ob, thisid, source_task=source_task) + + # descend into children and record all of them anew as well + for c in ob.children: + self._record_new_file_recursive(c, cur, thisid, source_task) + + + def _record_recursive(self, diff, cur, parent_id=None, source_task=None): + """Record this level of a diff.""" + if diff.A is None: + self._record_new_file_recursive(diff.B, cur, parent_id, source_task=None) + elif diff.B is None: + self._record_deleted_file_recursive(diff.B, cur, parent_id) + else: + # modified + pass + + # descend into children + + + def record(self, diff, parent_id=None, message=None, cur=None): + if cur is None: + cur = self.conn.cursor() + with self.program('RECORD', message) as p: - for _, d in diff.flatten_tree(): - pass - - # create entries for all directories if they do not yet exist (top - # down) - - # insert files and symlinks into filedir, computing checksums on - # each - - # update versions in nancy.db as appropriate + # create a task for this operation + task_id = p.new_task('Store._record_recursive') + # descend the diff, tracking parent filedir IDs, creating them and + # recording new versions of each, when necessary + self._record_recursive(diff, cur, source_task=task_id) #@contextmanager def run( diff --git a/tests/test_db.py b/tests/test_db.py index 7ad04f2..2088780 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -3,6 +3,7 @@ Pure SQL tests that don't depend on nancy's Python code """ import pytest +import datetime import os import sqlite3 @@ -156,31 +157,60 @@ def insert_directories(insert_store): cur = insert_store cur.executemany( 'INSERT INTO filedir VALUES ' - '(?, ?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?)', [( None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, '.', #filename TEXT, -- only a filename, not a path None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, ), ( None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, 'foo', #filename TEXT, -- only a filename, not a path 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, ), ( None, #id INTEGER PRIMARY KEY NOT NULL, 2, #store INTEGER NOT NULL, '.', #filename TEXT, -- only a filename, not a path None, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, + )], + ) + cur.executemany( + 'INSERT INTO filedir_version VALUES ' + '(?, ?, ?, ?, ?, ?, ?, ?, ?)', + [( + None, #id INTEGER PRIMARY KEY NOT NULL, + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, + ), ( + None, #id INTEGER PRIMARY KEY NOT NULL, + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, + ), ( + None, #id INTEGER PRIMARY KEY NOT NULL, + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, )], ) return cur @@ -191,15 +221,13 @@ def test_crossstore_directory_insert(insert_directories): # declaring directory as belonging to store 2, but parent's store is 1 cur.execute( 'INSERT INTO filedir VALUES ' - '(?, ?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?)', ( None, #id INTEGER PRIMARY KEY NOT NULL, 2, #store INTEGER NOT NULL, 'some_dir', #filename TEXT, -- only a filename, not a path 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, )) for row in cur.connection.iterdump(): print(row) @@ -210,55 +238,79 @@ def test_crossstore_directory_insert(insert_directories): @pytest.fixture def insert_files(insert_directories): cur = insert_directories + cur.execute('SELECT COUNT(*) FROM filedir') + nprev, = cur.fetchone() cur.executemany( 'INSERT INTO filedir VALUES ' - '(?, ?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?)', [( None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, 'example.csv', #filename TEXT, -- only a filename, not a path 1, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, ), ( None, #id INTEGER PRIMARY KEY NOT NULL, 1, #store INTEGER NOT NULL, 'plots.png', #filename TEXT, -- only a filename, not a path 2, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, )] ) + cur.executemany( + 'INSERT INTO filedir_version VALUES ' + '(?, ?, ?, ?, ?, ?, ?, ?, ?)', + [( + None, #id INTEGER PRIMARY KEY NOT NULL, + nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxrwxr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a84ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, + ), ( # second version of first file + None, #id INTEGER PRIMARY KEY NOT NULL, + nprev + 1, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, + ), ( + None, #id INTEGER PRIMARY KEY NOT NULL, + nprev + 2, # INTEGER REFERENCES filedir ON UPDATE CASCADE, -- parent filedir entry + datetime.datetime.now().timestamp(), + 'REG', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details + False, #deleted BOOL NOT NULL, -- set True when recording a deleted file + 'drwxr-xr-x', #unfrozen_perms TEXT, -- stat.filemode(os.stat(path).st_mode): '-rw-rw-r--' + None, #symlink_target TEXT, -- if this is a symlink, this is the (read but not fully resolved) target. I.e. this is the "content" of the symlink. + 'a94ed33864d06615a87bc8da5258d841163f1e7969367ecd07b041ae1a18febd', #sha256 TEXT, + None, #source_task INTEGER, + )]) return cur -def test_nondir_parent_directory_insert(insert_files): +# TODO: This test is disabled until triggers are added to check for these types +# of constraints. These became much more complicated to check when I added +# filedir_version. +def disabled_test_nondir_parent_directory_insert(insert_files): cur = insert_files with pytest.raises(sqlite3.IntegrityError): # declaring parent as 5, but 5 is a file (plots.png) cur.execute( 'INSERT INTO filedir VALUES ' - '(?, ?, ?, ?, ?, ?, ?)', + '(?, ?, ?, ?, ?)', ( None, #id INTEGER PRIMARY KEY NOT NULL, - 2, #store INTEGER NOT NULL, + 1, #store INTEGER NOT NULL, 'some_filedir.txt', #filename TEXT, -- only a filename, not a path 5, #parent INTEGER REFERENCES filedir ON UPDATE CASCADE, - 'DIR', #filetype TEXT, -- One of 'LNK', 'DIR', 'REG', etc. See store.FSEntry.from_path for details False, #frozen BOOL NOT NULL, - False, #deleted BOOL NOT NULL, )) for row in cur.connection.iterdump(): print(row) cur.execute('SELECT * FROM filedir') print(cur.fetchall()) - - -def test_update_deleted_file(insert_files): - cur = insert_files - # first we set a file to deleted - cur.execute('UPDATE filedir SET deleted=True WHERE id=5') - with pytest.raises(sqlite3.IntegrityError): - # Now we try and update it, which should fail due to trigger - cur.execute('UPDATE filedir SET frozen=True WHERE id=5') diff --git a/tests/test_fs.py b/tests/test_fs.py new file mode 100644 index 0000000..0b5ee88 --- /dev/null +++ b/tests/test_fs.py @@ -0,0 +1 @@ +from nancy import fs diff --git a/tests/test_store.py b/tests/test_store.py index 6a0fb48..fba5aa6 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -22,7 +22,7 @@ def filled_dir(bare_dir): def test_record_untracked_dir(filled_dir): from nancy.cli.record import record - record(filled_dir) + record(filled_dir, message='test_record_untracked_dir') @pytest.fixture