nancy/nancy/__init__.py
2022-09-17 12:57:15 -04:00

65 lines
1.8 KiB
Python

import sqlite3
from . import db
__version__ = "0.0.1"
# Calling code will build up a large DAG then at the very last step, call
# nancy.save_data(). Only at that point will we determine where to save the
# data by asking the user for an output dir. However, we need a database
# initialized in order to build up the DAG in the first place, so here we
# initialize an in-memory database to use until we have an output directory.
_conn = db.init(":memory:")
def save_data(
destination_mapping,
desc=f"Provenance-tracking with nancy v{__version__}",
):
"""
A command-line interface to compute and save a set of outputs.
Ex:
.. code:
save_data({
"scores.csv": scores,
"checkpoints": checkpoint_dir,
"plots/learning_curve.pdf": learning_curve_plot,
"plots/test_roc.pdf": test_auc_plot,
"
},
"""
global _conn
import argparse
import os
import sys
parser = argparse.ArgumentParser(description=desc)
parser.add_argument(
"--out_dir",
"-o",
required=True,
help="Directory (will be created) in which to save all outputs",
)
args = parser.parse_args()
if os.path.exists(args.out_dir):
print(
f"Output directory {args.out_dir} exists. Refusing to overwrite.",
file=sys.stderr,
)
sys.exit(1)
# Create the output directory, back up the in-memory db to this location, an
# switch to it.
os.makedirs(args.out_dir, exist_ok=True)
dst_conn = sqlite3.connect(os.path.join(args.out_dir, "prov.db"))
_conn.backup(dst_conn)
_conn.close()
_conn = dst_conn
# Now we save the provided roots, and record their locations in _conn.data