Add more Store functionality
This commit is contained in:
parent
3a99a14a0a
commit
c30ca6a083
@ -19,10 +19,20 @@ class AliasedGroup(click.Group):
|
||||
ctx.fail("Too many matches: %s" % ", ".join(sorted(matches)))
|
||||
|
||||
|
||||
@click.group(f"nancy v{__version__}", cls=AliasedGroup)
|
||||
@click.command()
|
||||
def version():
|
||||
"""Print version information."""
|
||||
print(f"nancy v{__version__}")
|
||||
|
||||
|
||||
@click.group(
|
||||
cls=AliasedGroup,
|
||||
help=f"Composable provenance tracking for scientific data")
|
||||
def main():
|
||||
"""
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
main.add_command(freeze)
|
||||
main.add_command(thaw)
|
||||
main.add_command(version)
|
||||
|
||||
@ -2,27 +2,43 @@ import click
|
||||
|
||||
from .. import store
|
||||
|
||||
import os
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("directory")
|
||||
def freeze(directory):
|
||||
"""
|
||||
Initialize tracking in a directory or freeze a tracked directory.
|
||||
|
||||
If DIRECTORY is not already part of an existing nancy store, then a new
|
||||
'nancy.db' file is created in that directory. On the other hand, if the
|
||||
directory is part of an existing store, it will be re-frozen and versions
|
||||
of any files changes since thawing will be incremented.
|
||||
"""
|
||||
if not os.path.isdir(directory):
|
||||
raise ValueError(f"Cannot freeze non-existent directory {directory}")
|
||||
|
||||
existing_store = db.find_store(directory)
|
||||
existing_store = store.find_store(directory)
|
||||
if existing_store is None: # this is a new store
|
||||
s = store.init(directory)
|
||||
s = store.Store.init(directory)
|
||||
else: # this is an existing store
|
||||
store.connect(directory)
|
||||
s = store.Store(directory)
|
||||
|
||||
# remove write permissions on all files except nancy.db
|
||||
|
||||
# compute checksums on all files, update versions in nancy.db as appropriate
|
||||
|
||||
# remove write permissions on nancy.db
|
||||
s.freeze()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("files", nargs=-1) # , help="Files or directories to thaw.")
|
||||
def thaw(files):
|
||||
"""
|
||||
Enable manual alteration of files within a tracked directory.
|
||||
|
||||
This command is meant to be used in conjunction with the 'freeze'
|
||||
subcommand. After thawing, changes may be made in the current directory,
|
||||
after which `nancy freeze` should be run changes may be made in the current
|
||||
directory, after which `nancy freeze` should be run. At that point, changes
|
||||
will be recorded: new files will be detected and modified files will have
|
||||
their version numbers incremented.
|
||||
"""
|
||||
pass
|
||||
|
||||
@ -1,6 +1,206 @@
|
||||
"""Utilities for creating new stores and linking between them."""
|
||||
|
||||
from . import db
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
import stat
|
||||
|
||||
|
||||
def remove_write_perms(path):
|
||||
"""Remove write permissions for all users while preserving other perms"""
|
||||
s = os.stat(path)
|
||||
orig_perm_string = stat.filemode(s)
|
||||
os.chmod(
|
||||
path,
|
||||
s.st_mode ^ stat.S_IWUSR ^ stat.S_IWGRP ^ stat.S_IWOTH,
|
||||
follow_symlinks=False,
|
||||
)
|
||||
return orig_perm_string
|
||||
|
||||
|
||||
class Store:
|
||||
"""Describes a data directory, holds active connection to nancy.db"""
|
||||
|
||||
def __init__(self, directory=None, conn=None):
|
||||
"""
|
||||
Arguments:
|
||||
directory (str): Location of existing store directory. If omitted
|
||||
or None, initialize a store in memory, with no associated
|
||||
directory.
|
||||
"""
|
||||
if directory is None:
|
||||
self.path = None
|
||||
self.db_path = ':memory:'
|
||||
else:
|
||||
self.path = Path(directory)
|
||||
self.db_path = self.path / "nancy.db"
|
||||
if conn is None:
|
||||
self.connect()
|
||||
else:
|
||||
self.conn = conn
|
||||
|
||||
def copy(self, store_path):
|
||||
"""Copy this store to a new store path"""
|
||||
dst_db_path = os.path.join(store_path, "nancy.db")
|
||||
dst_conn = sqlite3.connect(dst_db_path)
|
||||
self.conn.backup(dst_conn)
|
||||
dst_conn.close
|
||||
return self.__class__(store_path)
|
||||
|
||||
def connect(self):
|
||||
self.conn = sqlite3.connect(self.db_path)
|
||||
self.conn.cursor().execute("PRAGMA foreign_keys = ON;")
|
||||
return self.conn
|
||||
|
||||
@classmethod
|
||||
def init(cls, directory=None):
|
||||
if directory is None: # initialize an in-memory store
|
||||
db_path = ':memory:'
|
||||
else:
|
||||
if not os.path.isdir(directory):
|
||||
raise FileNotFoundError(
|
||||
f"Directory {directory} must exist before initializing a store there.",
|
||||
)
|
||||
db_path = os.path.join(directory, 'nancy.db')
|
||||
if os.path.isfile(db_path):
|
||||
raise FileExistsError(
|
||||
f"File {db_path} exists. Refusing to re-initialize",
|
||||
)
|
||||
# initialize a database in the target directory
|
||||
schema = importlib.resources.open_text("nancy", "schema.sql").read()
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.cursor().executescript(schema)
|
||||
new_store = cls(directory, conn)
|
||||
|
||||
new_store.record_machine_description()
|
||||
|
||||
return new_store
|
||||
|
||||
def record_machine_description(self):
|
||||
"""Record machine-specific information"""
|
||||
import platform, time
|
||||
cur = self.conn.cursor()
|
||||
fdor = ''
|
||||
try:
|
||||
fdor = json.dumps(platform.freedesktop_os_release())
|
||||
except AttributeError:
|
||||
# freedesktop_os_release only available for python >= 3.10
|
||||
fdor = ''
|
||||
machine_info = (
|
||||
platform.node(),
|
||||
platform.processor(),
|
||||
platform.system(),
|
||||
platform.release(),
|
||||
platform.machine(),
|
||||
time.tzname[time.daylight],
|
||||
fdor,
|
||||
json.dumps(platform.win32_ver()),
|
||||
json.dumps(platform.mac_ver()),
|
||||
)
|
||||
cur.execute('INSERT INTO machine VALUES(NULL,?,?,?,?,?,?,?,?,?)', machine_info)
|
||||
self.conn.commit()
|
||||
|
||||
def make_readonly(self):
|
||||
"""Make store directory read-only (except for nancy.db) and return file list"""
|
||||
dirpaths = {}
|
||||
regfilepaths = {}
|
||||
symlinkpaths = {}
|
||||
irregfilepaths = {} # any file that's not regular or symlink. e.g. inodes or sockets
|
||||
|
||||
# traversing bottom-up makes it easier to freeze perms on directories
|
||||
for root, dirs, files in os.walk(self.path, topdown=False):
|
||||
for f in files:
|
||||
p = os.path.join(root, f)
|
||||
if p == './nancy.db':
|
||||
continue
|
||||
|
||||
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
if not os.path.isfile(p):
|
||||
# not a link or regular file
|
||||
irregfilepaths[p] = orig_perms
|
||||
|
||||
if os.path.islink(p):
|
||||
symlinkpaths[p] = orig_perms
|
||||
else:
|
||||
regfilepaths[p] = orig_perms
|
||||
|
||||
for d in dirs:
|
||||
p = os.path.join(root, d)
|
||||
|
||||
orig_perms = remove_write_perms(os.path.join(self.path, p))
|
||||
|
||||
dirpaths[p] = orig_perms
|
||||
return dirpaths, regfilepaths, symlinkpaths, irregfilepaths
|
||||
|
||||
def file_hashes(self, local_only=True):
|
||||
"""Get recorded hashes of all files currently tracked in this store"""
|
||||
|
||||
|
||||
def freeze(self):
|
||||
# Remove write permissions on all files except nancy.db
|
||||
# This also catalogs all files, directories, and symlinks
|
||||
# Note that ./nancy.db is NOT listed in files and is not altered
|
||||
dirs, files, symlinks, irreg = self.make_readonly()
|
||||
if len(irreg) > 0:
|
||||
raise Exception(
|
||||
"Refusing to freeze directory containing irregular files: "
|
||||
", ".join(irreg),
|
||||
)
|
||||
for p, perms in symlinks.items():
|
||||
target = os.path.readlink(p) # gives possibly relative path to tgt
|
||||
resolved = os.path.realpath(p)
|
||||
if not resolved.startswith(os.path.abspath(self.path)):
|
||||
raise Exception(
|
||||
f"Freezing {p} failed since it points to location "
|
||||
f"{target} which is outside store path ({self.path})",
|
||||
)
|
||||
|
||||
# compute checksums on all files
|
||||
|
||||
# detect new and deleted files, and those that have changed type
|
||||
|
||||
# update versions in nancy.db as appropriate
|
||||
|
||||
# remove write permissions on nancy.db
|
||||
remove_write_perms(os.path.join(self.path, 'nancy.db'))
|
||||
|
||||
#@contextmanager
|
||||
def run(
|
||||
self,
|
||||
name=None,
|
||||
message=None,
|
||||
):
|
||||
"""
|
||||
Create a context manager that encapsulates a procedure that can save files.
|
||||
|
||||
Note that this does NOT spawn any new OS processes or threads.
|
||||
|
||||
Example:
|
||||
|
||||
s = nancy.store.init(target_directory)
|
||||
with s.run("sum_dataframe") as f:
|
||||
x = PandasDataframe()
|
||||
y = Sum(x)
|
||||
f.save('stats/xsum.csv', y)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StoreFile:
|
||||
"""Describes a file that is recorded in the store."""
|
||||
|
||||
def __init__(self, store, rel_path):
|
||||
self.store = store
|
||||
self.rel_path = rel_path
|
||||
|
||||
def save(self):
|
||||
# call the appropriate save method
|
||||
pass
|
||||
|
||||
|
||||
def find_store(path):
|
||||
@ -14,7 +214,4 @@ def find_store(path):
|
||||
newd = os.path.dirname(path)
|
||||
if newd == d: # indicates this is a filesystem root like C:\ or /
|
||||
return None
|
||||
|
||||
|
||||
def connect(path):
|
||||
"""Given directory, connect to its store"""
|
||||
d = newd
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user