166 lines
6.4 KiB
Rust
166 lines
6.4 KiB
Rust
use clap::{Parser, Subcommand};
|
|
use rusqlite::{Connection, OpenFlags};
|
|
|
|
use std::path::PathBuf;
|
|
use std::process;
|
|
|
|
// Composable provenance tracking for scientific data analysis
|
|
#[derive(Parser)]
|
|
#[command(author, version, about, long_about = None, arg_required_else_help=true)]
|
|
struct Cli {
|
|
#[command(subcommand)]
|
|
command: Option<Commands>,
|
|
}
|
|
|
|
#[derive(Subcommand)]
|
|
enum Commands {
|
|
/// Record changes to files/directories within a dataset (or create a new dataset)
|
|
#[command()]
|
|
Record {
|
|
/// If not within an existing dataset, create one
|
|
#[arg(short, long)]
|
|
initialize: bool,
|
|
/// A short descriptive message for this recording, i.e. "Re-run with lr=1e-3"
|
|
#[arg(short, long)]
|
|
message: String,
|
|
#[arg()]
|
|
record_paths: Vec<PathBuf>,
|
|
},
|
|
/// Check for changes in dataset and print basic statistics
|
|
Status {},
|
|
/// Just say hello
|
|
Hello {
|
|
//#[arg(short, long)]
|
|
//fromlib: bool,
|
|
},
|
|
}
|
|
|
|
fn main() {
|
|
env_logger::init();
|
|
let args = Cli::parse();
|
|
|
|
match &args.command {
|
|
Some(Commands::Hello {}) => {
|
|
println!("Hello from nancy (binary)!");
|
|
match nancy::print_uuid() {
|
|
Ok(_) => {
|
|
println!("OK");
|
|
}
|
|
Err(e) => {
|
|
println!("SQLite error: {}", e);
|
|
}
|
|
};
|
|
}
|
|
Some(Commands::Record {
|
|
initialize,
|
|
message,
|
|
record_paths,
|
|
}) => {
|
|
// If no paths are given, use ["."] for the following steps.
|
|
|
|
// Determine dataset dir (ds_dir)
|
|
let mut conn = match nancy::db::find_dataset_dir(record_paths) {
|
|
Err(nancy::db::FindDatasetError::NoDataset(path)) => {
|
|
log::info!("No dataset at or above nearest ancestor path: {:?}", path);
|
|
if !initialize {
|
|
log::error!("Refusing to initialize a new dataset at {path:?}. \
|
|
Pass the -i or --initialize flag to request initialization.");
|
|
process::exit(1);
|
|
}
|
|
let dbpath = &path.join("nancy.db");
|
|
log::info!("Initializing new database at {:?}", dbpath);
|
|
let mut c = match Connection::open(dbpath) {
|
|
Err(e) => {
|
|
log::error!(
|
|
"Could not open new SQLite database at {dbpath:?}: {:?}",
|
|
e
|
|
);
|
|
process::exit(1);
|
|
}
|
|
Ok(cc) => cc,
|
|
};
|
|
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
|
match nancy::db::init(&mut c) {
|
|
Err(e) => {
|
|
log::error!(
|
|
"Encountered error in initializing schema: {:?}",
|
|
e
|
|
);
|
|
process::exit(1);
|
|
}
|
|
Ok(dataset_uuid) => {
|
|
log::trace!("Init OK");
|
|
log::info!("Dataset UUID is {dataset_uuid}");
|
|
// Run an empty program so that the dataset log reflects when it was
|
|
// initialized
|
|
nancy::program::with_program(
|
|
&mut c,
|
|
"INIT",
|
|
"Initialize dataset",
|
|
|prog| {
|
|
let _ = prog.perform_task(&[], |task| {
|
|
log::debug!("INIT task UUID is {}", task.uuid);
|
|
});
|
|
let okres: Result<(), ()> = Ok(());
|
|
okres
|
|
},
|
|
)
|
|
.expect("Empty program should not throw error");
|
|
}
|
|
}
|
|
c
|
|
},
|
|
Err(e) => {
|
|
log::error!("Could not determine dataset directory: {:?}", e);
|
|
process::exit(1);
|
|
},
|
|
Ok(path) => {
|
|
// existing
|
|
log::info!("Found existing dataset at path: {:?}", path);
|
|
let dbpath = &path.join("nancy.db");
|
|
// open with flags to prevent creating when we believe the db exists
|
|
let mut c = match Connection::open_with_flags(
|
|
dbpath,
|
|
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
|
) {
|
|
Err(e) => {
|
|
log::error!(
|
|
"Could not open existing SQLite database at {dbpath:?}: {e:?}"
|
|
);
|
|
process::exit(1);
|
|
}
|
|
Ok(cc) => cc,
|
|
};
|
|
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
|
match nancy::db::ensure_schema(&mut c) {
|
|
Err(e) => {
|
|
log::error!("Error ensuring schema: {}", e);
|
|
process::exit(1);
|
|
}
|
|
Ok(update_result) => {
|
|
log::debug!("Schema ensured: {:?}", update_result);
|
|
}
|
|
}
|
|
c
|
|
},
|
|
};
|
|
|
|
if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
|
|
prog.perform_task(&[], |_task| {
|
|
let dataset_path = PathBuf::from(".");
|
|
// Note that this may fail, in which case we should roll back only this program
|
|
// but keep the dataset initialized.
|
|
nancy::fs::record(&dataset_path, message)
|
|
})
|
|
}) {
|
|
log::error!("Encountered error in RECORD program: {:?}", e);
|
|
process::exit(1);
|
|
};
|
|
}
|
|
Some(Commands::Status {}) => {
|
|
println!("status not yet implemented");
|
|
}
|
|
None => {}
|
|
}
|
|
}
|