Add more Store functionality
This commit is contained in:
parent
3a99a14a0a
commit
c30ca6a083
@ -19,10 +19,20 @@ class AliasedGroup(click.Group):
|
|||||||
ctx.fail("Too many matches: %s" % ", ".join(sorted(matches)))
|
ctx.fail("Too many matches: %s" % ", ".join(sorted(matches)))
|
||||||
|
|
||||||
|
|
||||||
@click.group(f"nancy v{__version__}", cls=AliasedGroup)
|
@click.command()
|
||||||
|
def version():
|
||||||
|
"""Print version information."""
|
||||||
|
print(f"nancy v{__version__}")
|
||||||
|
|
||||||
|
|
||||||
|
@click.group(
|
||||||
|
cls=AliasedGroup,
|
||||||
|
help=f"Composable provenance tracking for scientific data")
|
||||||
def main():
|
def main():
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
main.add_command(freeze)
|
main.add_command(freeze)
|
||||||
main.add_command(thaw)
|
main.add_command(thaw)
|
||||||
|
main.add_command(version)
|
||||||
|
|||||||
@ -2,27 +2,43 @@ import click
|
|||||||
|
|
||||||
from .. import store
|
from .. import store
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.argument("directory")
|
@click.argument("directory")
|
||||||
def freeze(directory):
|
def freeze(directory):
|
||||||
|
"""
|
||||||
|
Initialize tracking in a directory or freeze a tracked directory.
|
||||||
|
|
||||||
|
If DIRECTORY is not already part of an existing nancy store, then a new
|
||||||
|
'nancy.db' file is created in that directory. On the other hand, if the
|
||||||
|
directory is part of an existing store, it will be re-frozen and versions
|
||||||
|
of any files changes since thawing will be incremented.
|
||||||
|
"""
|
||||||
if not os.path.isdir(directory):
|
if not os.path.isdir(directory):
|
||||||
raise ValueError(f"Cannot freeze non-existent directory {directory}")
|
raise ValueError(f"Cannot freeze non-existent directory {directory}")
|
||||||
|
|
||||||
existing_store = db.find_store(directory)
|
existing_store = store.find_store(directory)
|
||||||
if existing_store is None: # this is a new store
|
if existing_store is None: # this is a new store
|
||||||
s = store.init(directory)
|
s = store.Store.init(directory)
|
||||||
else: # this is an existing store
|
else: # this is an existing store
|
||||||
store.connect(directory)
|
s = store.Store(directory)
|
||||||
|
|
||||||
# remove write permissions on all files except nancy.db
|
s.freeze()
|
||||||
|
|
||||||
# compute checksums on all files, update versions in nancy.db as appropriate
|
|
||||||
|
|
||||||
# remove write permissions on nancy.db
|
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
|
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
|
||||||
def thaw(files):
|
def thaw(files):
|
||||||
|
"""
|
||||||
|
Enable manual alteration of files within a tracked directory.
|
||||||
|
|
||||||
|
This command is meant to be used in conjunction with the 'freeze'
|
||||||
|
subcommand. After thawing, changes may be made in the current directory,
|
||||||
|
after which `nancy freeze` should be run changes may be made in the current
|
||||||
|
directory, after which `nancy freeze` should be run. At that point, changes
|
||||||
|
will be recorded: new files will be detected and modified files will have
|
||||||
|
their version numbers incremented.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -1,6 +1,206 @@
|
|||||||
"""Utilities for creating new stores and linking between them."""
|
"""Utilities for creating new stores and linking between them."""
|
||||||
|
|
||||||
|
from . import db
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import sqlite3
|
||||||
|
import stat
|
||||||
|
|
||||||
|
|
||||||
|
def remove_write_perms(path):
|
||||||
|
"""Remove write permissions for all users while preserving other perms"""
|
||||||
|
s = os.stat(path)
|
||||||
|
orig_perm_string = stat.filemode(s)
|
||||||
|
os.chmod(
|
||||||
|
path,
|
||||||
|
s.st_mode ^ stat.S_IWUSR ^ stat.S_IWGRP ^ stat.S_IWOTH,
|
||||||
|
follow_symlinks=False,
|
||||||
|
)
|
||||||
|
return orig_perm_string
|
||||||
|
|
||||||
|
|
||||||
|
class Store:
|
||||||
|
"""Describes a data directory, holds active connection to nancy.db"""
|
||||||
|
|
||||||
|
def __init__(self, directory=None, conn=None):
|
||||||
|
"""
|
||||||
|
Arguments:
|
||||||
|
directory (str): Location of existing store directory. If omitted
|
||||||
|
or None, initialize a store in memory, with no associated
|
||||||
|
directory.
|
||||||
|
"""
|
||||||
|
if directory is None:
|
||||||
|
self.path = None
|
||||||
|
self.db_path = ':memory:'
|
||||||
|
else:
|
||||||
|
self.path = Path(directory)
|
||||||
|
self.db_path = self.path / "nancy.db"
|
||||||
|
if conn is None:
|
||||||
|
self.connect()
|
||||||
|
else:
|
||||||
|
self.conn = conn
|
||||||
|
|
||||||
|
def copy(self, store_path):
|
||||||
|
"""Copy this store to a new store path"""
|
||||||
|
dst_db_path = os.path.join(store_path, "nancy.db")
|
||||||
|
dst_conn = sqlite3.connect(dst_db_path)
|
||||||
|
self.conn.backup(dst_conn)
|
||||||
|
dst_conn.close
|
||||||
|
return self.__class__(store_path)
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.conn = sqlite3.connect(self.db_path)
|
||||||
|
self.conn.cursor().execute("PRAGMA foreign_keys = ON;")
|
||||||
|
return self.conn
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def init(cls, directory=None):
|
||||||
|
if directory is None: # initialize an in-memory store
|
||||||
|
db_path = ':memory:'
|
||||||
|
else:
|
||||||
|
if not os.path.isdir(directory):
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"Directory {directory} must exist before initializing a store there.",
|
||||||
|
)
|
||||||
|
db_path = os.path.join(directory, 'nancy.db')
|
||||||
|
if os.path.isfile(db_path):
|
||||||
|
raise FileExistsError(
|
||||||
|
f"File {db_path} exists. Refusing to re-initialize",
|
||||||
|
)
|
||||||
|
# initialize a database in the target directory
|
||||||
|
schema = importlib.resources.open_text("nancy", "schema.sql").read()
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.cursor().executescript(schema)
|
||||||
|
new_store = cls(directory, conn)
|
||||||
|
|
||||||
|
new_store.record_machine_description()
|
||||||
|
|
||||||
|
return new_store
|
||||||
|
|
||||||
|
def record_machine_description(self):
|
||||||
|
"""Record machine-specific information"""
|
||||||
|
import platform, time
|
||||||
|
cur = self.conn.cursor()
|
||||||
|
fdor = ''
|
||||||
|
try:
|
||||||
|
fdor = json.dumps(platform.freedesktop_os_release())
|
||||||
|
except AttributeError:
|
||||||
|
# freedesktop_os_release only available for python >= 3.10
|
||||||
|
fdor = ''
|
||||||
|
machine_info = (
|
||||||
|
platform.node(),
|
||||||
|
platform.processor(),
|
||||||
|
platform.system(),
|
||||||
|
platform.release(),
|
||||||
|
platform.machine(),
|
||||||
|
time.tzname[time.daylight],
|
||||||
|
fdor,
|
||||||
|
json.dumps(platform.win32_ver()),
|
||||||
|
json.dumps(platform.mac_ver()),
|
||||||
|
)
|
||||||
|
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?)', machine_info)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def make_readonly(self):
|
||||||
|
"""Make store directory read-only (except for nancy.db) and return file list"""
|
||||||
|
dirpaths = {}
|
||||||
|
regfilepaths = {}
|
||||||
|
symlinkpaths = {}
|
||||||
|
irregfilepaths = {} # any file that's not regular or symlink. e.g. inodes or sockets
|
||||||
|
|
||||||
|
# traversing bottom-up makes it easier to freeze perms on directories
|
||||||
|
for root, dirs, files in os.walk(self.path, topdown=False):
|
||||||
|
for f in files:
|
||||||
|
p = os.path.join(root, f)
|
||||||
|
if p == './nancy.db':
|
||||||
|
continue
|
||||||
|
|
||||||
|
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||||
|
|
||||||
|
if not os.path.isfile(p):
|
||||||
|
# not a link or regular file
|
||||||
|
irregfilepaths[p] = orig_perms
|
||||||
|
|
||||||
|
if os.path.islink(p):
|
||||||
|
symlinkpaths[p] = orig_perms
|
||||||
|
else:
|
||||||
|
regfilepaths[p] = orig_perms
|
||||||
|
|
||||||
|
for d in dirs:
|
||||||
|
p = os.path.join(root, d)
|
||||||
|
|
||||||
|
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||||
|
|
||||||
|
dirpaths[p] = orig_perms
|
||||||
|
return dirpaths, regfilepaths, symlinkpaths, irregfilepaths
|
||||||
|
|
||||||
|
def file_hashes(self, local_only=True):
|
||||||
|
"""Get recorded hashes of all files currently tracked in this store"""
|
||||||
|
|
||||||
|
|
||||||
|
def freeze(self):
|
||||||
|
# Remove write permissions on all files except nancy.db
|
||||||
|
# This also catalogs all files, directories, and symlinks
|
||||||
|
# Note that ./nancy.db is NOT listed in files and is not altered
|
||||||
|
dirs, files, symlinks, irreg = self.make_readonly()
|
||||||
|
if len(irreg) > 0:
|
||||||
|
raise Exception(
|
||||||
|
"Refusing to freeze directory containing irregular files: "
|
||||||
|
", ".join(irreg),
|
||||||
|
)
|
||||||
|
for p, perms in symlinks.items():
|
||||||
|
target = os.path.readlink(p) # gives possibly relative path to tgt
|
||||||
|
resolved = os.path.realpath(p)
|
||||||
|
if not resolved.startswith(os.path.abspath(self.path)):
|
||||||
|
raise Exception(
|
||||||
|
f"Freezing {p} failed since it points to location "
|
||||||
|
f"{target} which is outside store path ({self.path})",
|
||||||
|
)
|
||||||
|
|
||||||
|
# compute checksums on all files
|
||||||
|
|
||||||
|
# detect new and deleted files, and those that have changed type
|
||||||
|
|
||||||
|
# update versions in nancy.db as appropriate
|
||||||
|
|
||||||
|
# remove write permissions on nancy.db
|
||||||
|
remove_write_perms(os.path.join(self.path, 'nancy.db'))
|
||||||
|
|
||||||
|
#@contextmanager
|
||||||
|
def run(
|
||||||
|
self,
|
||||||
|
name=None,
|
||||||
|
message=None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Create a context manager that encapsulates a procedure that can save files.
|
||||||
|
|
||||||
|
Note that this does NOT spawn any new OS processes or threads.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
s = nancy.store.init(target_directory)
|
||||||
|
with s.run("sum_dataframe") as f:
|
||||||
|
x = PandasDataframe()
|
||||||
|
y = Sum(x)
|
||||||
|
f.save('stats/xsum.csv', y)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class StoreFile:
|
||||||
|
"""Describes a file that is recorded in the store."""
|
||||||
|
|
||||||
|
def __init__(self, store, rel_path):
|
||||||
|
self.store = store
|
||||||
|
self.rel_path = rel_path
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
# call the appropriate save method
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def find_store(path):
|
def find_store(path):
|
||||||
@ -14,7 +214,4 @@ def find_store(path):
|
|||||||
newd = os.path.dirname(path)
|
newd = os.path.dirname(path)
|
||||||
if newd == d: # indicates this is a filesystem root like C:\ or /
|
if newd == d: # indicates this is a filesystem root like C:\ or /
|
||||||
return None
|
return None
|
||||||
|
d = newd
|
||||||
|
|
||||||
def connect(path):
|
|
||||||
"""Given directory, connect to its store"""
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user