diff --git a/src/commands/init.rs b/src/commands/init.rs new file mode 100644 index 0000000..392c7b5 --- /dev/null +++ b/src/commands/init.rs @@ -0,0 +1,53 @@ +use anyhow::{anyhow, bail, Context, Result}; +use rusqlite::Connection; +use std::path::Path; +use uuid::Uuid; + +fn init_schema(conn: &mut Connection, name: &str) -> Result { + let dataset_uuid = + nancy::db::init(conn, name).map_err(|e| anyhow!("failed to initialize schema: {}", e))?; + + log::trace!("Init OK"); + log::info!("Dataset UUID is {dataset_uuid}"); + // Run an empty program so that the dataset log reflects when it was + // initialized + nancy::program::with_program(conn, "INIT", "Initialize dataset", |prog| { + let _ = prog.perform_task(&[], |task| { + log::debug!("INIT task UUID is {}", task.key); + Ok::<(), ()>(()) + }); + let okres: Result<()> = Ok(()); + okres + }) + .context("Could not run empty program during init_schema")??; + + Ok(dataset_uuid) +} + +/// Run init subcommand and return the return code +pub fn init_cmd(name: &str, dataset_path: &Path) -> Result { + if !dataset_path.is_dir() { + bail!( + "Path {:?} does not point to an existing directory", + dataset_path + ); + } + let dbpath = &dataset_path.join("nancy.db"); + if dbpath.exists() { + bail!( + "Database {:?} exists, indicating this dataset is already \ + initialized. Refusing to overwrite.", + dbpath + ); + } + log::info!("Initializing new database at {:?}", dbpath); + let mut conn = Connection::open(dbpath) + .with_context(|| format!("Could not open new SQLite database at {dbpath:?}"))?; + conn.pragma_update(None, "foreign_keys", &"ON") + .context("Could not set foreign_keys pragma")?; + let u = init_schema(&mut conn, name)?; + + Ok(u) +} + + diff --git a/src/commands/mod.rs b/src/commands/mod.rs new file mode 100644 index 0000000..3a68d2a --- /dev/null +++ b/src/commands/mod.rs @@ -0,0 +1,53 @@ +use anyhow::{Context, Result}; +use std::path::PathBuf; +use clap::Subcommand; + +pub mod init; +pub mod record; +pub mod status; + +#[derive(Subcommand)] +pub enum Command { + /// Initialize a new dataset + #[command()] + Init { + /// A short descriptive name for the dataset + #[arg(short, long)] + name: String, + /// The top level directory of the dataset (must be a directory) + #[arg(default_value = ".")] + dataset_path: PathBuf, + }, + /// Record changes to files/directories within a dataset (or create a new dataset) + #[command()] + Record { + /// A short descriptive message for this recording, i.e. "Re-run with lr=1e-3" + #[arg(short, long)] + message: String, + /// Paths to record. Defaults to current dataset or current directory if not in a dataset. + #[arg()] + record_paths: Vec, + }, + /// Check for changes in dataset and print basic statistics + Status { + /// Paths to display current status. Defaults to dataset containing current directory. + #[arg()] + status_paths: Vec, + }, +} + +pub fn run(command: &Command) -> Result<()> { + match &command { + Command::Init { name, dataset_path } => { + init::init_cmd(name, dataset_path)?; + } + Command::Record { + message, + record_paths, + } => record::record_cmd(message, record_paths)?, + Command::Status { + status_paths, + } => status::status_cmd(status_paths)?, + } + Ok(()) +} diff --git a/src/commands/record.rs b/src/commands/record.rs new file mode 100644 index 0000000..1f5bfae --- /dev/null +++ b/src/commands/record.rs @@ -0,0 +1,41 @@ +use anyhow::{anyhow, Context, Result}; +use rusqlite::{Connection, OpenFlags}; +use std::path::PathBuf; + +use nancy::{fs, program}; + +pub fn record_cmd(message: &str, record_paths: &[PathBuf]) -> Result<()> { + // If no paths are given, use ["."] for the following steps. + + // Determine dataset dir (ds_dir) + let dataset_path = nancy::db::find_dataset_dir(record_paths) + .with_context(|| "Could not determine dataset directory")?; + log::info!("Found existing dataset at path: {:?}", dataset_path); + let dbpath = &dataset_path.join("nancy.db"); + + // open with flags to prevent creating when we believe the db exists + let mut conn = Connection::open_with_flags( + dbpath, + OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX, + ) + .context("Could not open existing SQLite database at {dbpath:?}: {e:?}")?; + + conn.pragma_update(None, "foreign_keys", &"ON")?; + + nancy::db::ensure_schema(&mut conn)?; + + // Note that recording may fail, in which case we should roll back only this program but keep + // the dataset initialized. + program::with_program(&mut conn, "RECORD", message, |prog| { + prog.perform_task(&[], |task| { + fs::record( + prog.transaction, + record_paths, + &dataset_path, + message, + task.key, + ) + }) + })?? + .map_err(|e| anyhow!("Record program failed: {}", e)) +} diff --git a/src/commands/status.rs b/src/commands/status.rs new file mode 100644 index 0000000..d35ebca --- /dev/null +++ b/src/commands/status.rs @@ -0,0 +1,23 @@ +use anyhow::{Context, Result}; +use std::path::PathBuf; + + +pub fn status_cmd(status_paths: &[PathBuf]) -> Result<()> { + let status_paths = if status_paths.is_empty() { + vec![PathBuf::from(".").canonicalize()?] + } else { + status_paths.to_vec() + }; + + // Determine dataset dir (ds_dir) + let dataset_path = nancy::db::find_dataset_dir(&status_paths) + .with_context(|| + format!("Could not find nancy.db in any directory containing given paths: {:?}", status_paths))?; + log::info!("Found existing dataset at path: {:?}", dataset_path); + let dbpath = &dataset_path.join("nancy.db"); + + println!("Computing status for paths: {:?}", status_paths); + + log::error!("status_cmd not yet implemented"); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index cda6cd3..6e72159 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,143 +1,19 @@ -use anyhow::{anyhow, bail, Context, Result}; -use clap::{Parser, Subcommand}; -use rusqlite::{Connection, OpenFlags}; -use uuid::Uuid; +use anyhow::Result; +use clap::Parser; -use std::path::{Path, PathBuf}; +mod commands; -use nancy::{fs, program}; - -// Composable provenance tracking for scientific data analysis +/// Composable provenance tracking for scientific data analysis #[derive(Parser)] #[command(author, version, about, long_about = None, arg_required_else_help=true)] struct Cli { #[command(subcommand)] - command: Command, -} - -#[derive(Subcommand)] -enum Command { - /// Initialize a new dataset - #[command()] - Init { - /// A short descriptive name for the dataset - #[arg(short, long)] - name: String, - /// The top level directory of the dataset (must be a directory) - #[arg(default_value = ".")] - dataset_path: PathBuf, - }, - /// Record changes to files/directories within a dataset (or create a new dataset) - #[command()] - Record { - /// A short descriptive message for this recording, i.e. "Re-run with lr=1e-3" - #[arg(short, long)] - message: String, - #[arg()] - record_paths: Vec, - }, - /// Check for changes in dataset and print basic statistics - Status {}, -} - -fn init_schema(conn: &mut Connection, name: &str) -> Result { - let dataset_uuid = - nancy::db::init(conn, name).map_err(|e| anyhow!("failed to initialize schema: {}", e))?; - - log::trace!("Init OK"); - log::info!("Dataset UUID is {dataset_uuid}"); - // Run an empty program so that the dataset log reflects when it was - // initialized - nancy::program::with_program(conn, "INIT", "Initialize dataset", |prog| { - let _ = prog.perform_task(&[], |task| { - log::debug!("INIT task UUID is {}", task.key); - Ok::<(), ()>(()) - }); - let okres: Result<()> = Ok(()); - okres - }) - .context("Could not run empty program during init_schema")??; - - Ok(dataset_uuid) -} - -/// Run init subcommand and return the return code -fn init_cmd(name: &str, dataset_path: &Path) -> Result { - if !dataset_path.is_dir() { - bail!( - "Path {:?} does not point to an existing directory", - dataset_path - ); - } - let dbpath = &dataset_path.join("nancy.db"); - if dbpath.exists() { - bail!( - "Database {:?} exists, indicating this dataset is already \ - initialized. Refusing to overwrite.", - dbpath - ); - } - log::info!("Initializing new database at {:?}", dbpath); - let mut conn = Connection::open(dbpath) - .with_context(|| format!("Could not open new SQLite database at {dbpath:?}"))?; - conn.pragma_update(None, "foreign_keys", &"ON") - .context("Could not set foreign_keys pragma")?; - let u = init_schema(&mut conn, name)?; - - Ok(u) -} - -fn record_cmd(message: &str, record_paths: &Vec) -> Result<()> { - // If no paths are given, use ["."] for the following steps. - - // Determine dataset dir (ds_dir) - let dataset_path = nancy::db::find_dataset_dir(record_paths) - .with_context(|| "Could not determine dataset directory")?; - log::info!("Found existing dataset at path: {:?}", dataset_path); - let dbpath = &dataset_path.join("nancy.db"); - - // open with flags to prevent creating when we believe the db exists - let mut conn = Connection::open_with_flags( - dbpath, - OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX, - ) - .context("Could not open existing SQLite database at {dbpath:?}: {e:?}")?; - - conn.pragma_update(None, "foreign_keys", &"ON")?; - - nancy::db::ensure_schema(&mut conn)?; - - // Note that recording may fail, in which case we should roll back only this program but keep - // the dataset initialized. - program::with_program(&mut conn, "RECORD", message, |prog| { - prog.perform_task(&[], |task| { - fs::record( - prog.transaction, - record_paths.as_slice(), - &dataset_path, - message, - task.key, - ) - }) - })?? - .map_err(|e| anyhow!("Record program failed: {}", e)) + command: commands::Command, } fn main() -> Result<()> { env_logger::init(); let args = Cli::parse(); - match &args.command { - Command::Init { name, dataset_path } => { - init_cmd(name, dataset_path)?; - } - Command::Record { - message, - record_paths, - } => record_cmd(message, record_paths)?, - Command::Status {} => { - println!("status not yet implemented"); - } - } - Ok(()) + commands::run(&args.command) }