diff --git a/src/db.rs b/src/db.rs index e59f50a..40f342b 100644 --- a/src/db.rs +++ b/src/db.rs @@ -6,14 +6,16 @@ extern crate derive_more; use derive_more::{Display, Error, From}; +use log; use once_cell::sync::Lazy; use rusqlite; use rusqlite::Connection; use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M}; use uuid; +use std::collections::{HashSet}; use std::num::NonZeroUsize; // for describing schema versions -use std::path::{Path, PathBuf}; +use std::path::{Components, Path, PathBuf}; static MIGRATIONS: Lazy> = Lazy::new(|| { Migrations::new(vec![M::up(include_str!( @@ -106,43 +108,99 @@ pub fn local_uuid(conn: &Connection) -> Result { } #[derive(Debug)] -pub enum FindDatasetResult { - /// All searched paths belong to an existing dataset at this directory - ExistingDataset(PathBuf), - /// Searched paths belong to multiple existing datasets (or some may belong to None) - MultipleDatasets(Vec>), +pub enum FindDatasetError { + /// An empty list of paths was provided + NoPathsProvided, + /// Something went wrong when parsing paths + PathError(std::io::Error), /// None of the search paths belongs to an existing dataset. Returned path is the nearest /// common ancestor of all searched paths (on same filesystem) NoDataset(PathBuf), + /// Some, but not all, search paths do not reside in an existing dataset + SomeNotInDataset, + /// Searched paths belong to multiple existing datasets (or some may belong to None) + MultipleDatasets { + datasets: Vec, + some_paths_not_in_dataset: bool, + }, + // TODO: REMOVE THIS + NotImplemented, +} +impl From for FindDatasetError { + fn from(e: std::io::Error) -> FindDatasetError { FindDatasetError::PathError(e) } } -#[derive(Debug)] -pub enum FindDatasetError { - /// The provided paths lie on different filesystems, so no common ancestor can be defined - DifferentFilesystems, - /// An empty list of paths was provided - NoPathsProvided, -} +/// Given a collection of paths, find a common directory containing them. +/// +/// Returns `Ok(path)` if `path` is the only dataset found and it contains all given search paths. +pub fn find_dataset_dir(paths: &[PathBuf]) -> Result { + let mut ds_dirs: HashSet = HashSet::new(); -/// Given a collection of paths, find a common directory containing them -pub fn find_dataset_dir(paths: &[PathBuf]) -> Result { - match paths.get(0) { - None => Err(FindDatasetError::NoPathsProvided), - Some(p) => { - let mut ds_dirs: Vec = Vec::new(); - let mut ancestor = p; + let first_path = paths.iter().next().ok_or(FindDatasetError::NoPathsProvided)?.canonicalize()?; + log::debug!("First path is {:?}", first_path); - // for each canonicalized path from paths - // look at parent directories until either: - // - parent dir found in ds_dirs - // - nancy.db found - // - reached root of filesystem (no dataset found) - // also, until we've found a common ancestor, take parent of ancestor. - for p in paths { - let c = p.canonicalize(); + let mut common_path = first_path.to_path_buf(); + log::debug!("First path as PathBuf is {:?}", common_path); + let mut found_common_path = false; + + let mut some_not_in_ds = false; // whether any paths are not in any dataset + + for p in paths.iter() { + let pcanon = p.canonicalize()?; + log::debug!("Processing search path: {:?}", pcanon); + + // Look for nancy.db in all ancestors + // NOTE: .ancestors() returns self first + let mut found_nancydb = false; + for pan in pcanon.ancestors() { + log::debug!("Looking for nancy.db for {:?} in {:?}", p, pan); + if pan.join("nancy.db").exists() { + log::debug!("Found nancy.db for {:?} in ancestor directory {:?}", p, pan); + found_nancydb = true; + ds_dirs.insert(pan.to_owned()); + break; } - - Ok(FindDatasetResult::ExistingDataset(PathBuf::from("."))) } + if !found_nancydb { + some_not_in_ds = true; + } + + // determine common path + let mut this_common_path = PathBuf::new(); + for (l, r) in common_path.components().zip(pcanon.components()) { + log::debug!("Comparing components {:?} and {:?}", l, r); + if l == r { + this_common_path.push(l.as_os_str()); + found_common_path = true; + } else { + break; + } + } + common_path = this_common_path; + } + log::debug!("Found ds_dirs: {:?}", ds_dirs); + + if found_common_path { + log::debug!("Found common search path: {:?}", common_path); + } else { + log::debug!("Did not find a common path"); + } + + if ds_dirs.len() == 0 { + Err(FindDatasetError::NoDataset(common_path)) + } else if ds_dirs.len() == 1 { + let d = ds_dirs.iter().next().expect("ds_dirs has exactly one value"); + if some_not_in_ds { + Err(FindDatasetError::SomeNotInDataset) + } else { + Ok(d.to_path_buf()) + } + + } else { // ds_dirs.len() > 1 + Err(FindDatasetError::MultipleDatasets { + datasets: ds_dirs.into_iter().collect(), + some_paths_not_in_dataset: some_not_in_ds, + }) + // Note that multiple datasets might exist _and_ some paths may not be in them... } } diff --git a/src/main.rs b/src/main.rs index 7c63bb9..7845081 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,99 +56,85 @@ fn main() { message, record_paths, }) => { + // If no paths are given, use ["."] for the following steps. + // Determine dataset dir (ds_dir) let mut conn = match nancy::db::find_dataset_dir(record_paths) { + Err(nancy::db::FindDatasetError::NoDataset(path)) => { + // initialize + log::info!("No dataset at or above nearest ancestor path: {:?}", path); + let dbpath = &path.join("nancy.db"); + let mut c = match Connection::open(dbpath) { + Err(e) => { + log::error!( + "Could not open new SQLite database at {dbpath:?}." + ); + process::exit(1); + } + Ok(cc) => cc, + }; + c.pragma_update(None, "foreign_keys", &"ON").unwrap(); + match nancy::db::init(&mut c) { + Err(e) => { + log::error!( + "Encountered error in initializing schema: {:?}", + e + ); + process::exit(1); + } + Ok(dataset_uuid) => { + log::trace!("Init OK"); + log::info!("Dataset UUID is {dataset_uuid}"); + // Run an empty program so that the dataset log reflects when it was + // initialized + nancy::program::with_program( + &mut c, + "INIT", + "Initialize dataset", + |_| { + let okres: Result<(), ()> = Ok(()); + okres + }, + ) + .expect("Empty program should not throw error"); + } + } + c + }, Err(e) => { log::error!("Could not determine dataset directory: {:?}", e); process::exit(1); - } - Ok(res) => { - match res { - nancy::db::FindDatasetResult::MultipleDatasets(_) => { - log::error!("Provided paths belong to multiple datasets."); + }, + Ok(path) => { + // existing + log::info!("Found existing dataset at path: {:?}", path); + let dbpath = &path.join("nancy.db"); + // open with flags to prevent creating when we believe the db exists + let mut c = match Connection::open_with_flags( + dbpath, + OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX, + ) { + Err(e) => { + log::error!( + "Could not open existing SQLite database at {dbpath:?}: {e:?}" + ); process::exit(1); } - nancy::db::FindDatasetResult::NoDataset(path) => { - // initialize - log::info!("No dataset at or above nearest ancestor path: {:?}", path); - let dbpath = &path.join("nancy.db"); - let mut c = match Connection::open(dbpath) { - Err(e) => { - log::error!( - "Could not open new SQLite database at {dbpath:?}." - ); - process::exit(1); - } - Ok(cc) => cc, - }; - c.pragma_update(None, "foreign_keys", &"ON").unwrap(); - match nancy::db::init(&mut c) { - Err(e) => { - log::error!( - "Encountered error in initializing schema: {:?}", - e - ); - process::exit(1); - } - Ok(dataset_uuid) => { - log::trace!("Init OK"); - log::info!("Dataset UUID is {dataset_uuid}"); - // Run an empty program so that the dataset log reflects when it was - // initialized - nancy::program::with_program( - &mut c, - "INIT", - "Initialize dataset", - |_| { - let okres: Result<(), ()> = Ok(()); - okres - }, - ) - .expect("Empty program should not throw error"); - } - } - c + Ok(cc) => cc, + }; + c.pragma_update(None, "foreign_keys", &"ON").unwrap(); + match nancy::db::ensure_schema(&mut c, true) { + Err(e) => { + log::error!("Error ensuring schema: {}", e); + process::exit(1); } - nancy::db::FindDatasetResult::ExistingDataset(path) => { - // existing - log::info!("Found existing dataset at path: {:?}", path); - let dbpath = &path.join("nancy.db"); - // open with flags to prevent creating when we believe the db exists - let mut c = match Connection::open_with_flags( - dbpath, - OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX, - ) { - Err(e) => { - log::error!( - "Could not open existing SQLite database at {dbpath:?}." - ); - process::exit(1); - } - Ok(cc) => cc, - }; - c.pragma_update(None, "foreign_keys", &"ON").unwrap(); - match nancy::db::ensure_schema(&mut c, true) { - Err(e) => { - log::error!("Error ensuring schema: {}", e); - process::exit(1); - } - Ok(update_result) => { - log::debug!("Schema ensured: {:?}", update_result); - } - } - c + Ok(update_result) => { + log::debug!("Schema ensured: {:?}", update_result); } } - } + c + }, }; - // If no paths given, use ["."] for the following steps. - // First, for each requested record_path, find whether it exists within a dataset dir - // Ensure that all paths are in the same dataset dir, or none belong to a dataset dir - // If no dataset dirs found, get top directory containing them all and set this as ds_dir - let mut conn = Connection::open_in_memory().expect("Could not create in-memory db"); - - // If initializing: - // If not initializing, ensure the schema is up to date if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| { let dataset_path = PathBuf::from(".");