Fix error handling and common path search in find_dataset_dir

This commit is contained in:
Jacob Hinkle 2022-10-27 10:36:47 -04:00
parent a38bc78093
commit 88dd2bc220
2 changed files with 156 additions and 112 deletions

116
src/db.rs
View File

@ -6,14 +6,16 @@
extern crate derive_more; extern crate derive_more;
use derive_more::{Display, Error, From}; use derive_more::{Display, Error, From};
use log;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rusqlite; use rusqlite;
use rusqlite::Connection; use rusqlite::Connection;
use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M}; use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M};
use uuid; use uuid;
use std::collections::{HashSet};
use std::num::NonZeroUsize; // for describing schema versions use std::num::NonZeroUsize; // for describing schema versions
use std::path::{Path, PathBuf}; use std::path::{Components, Path, PathBuf};
static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| { static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| {
Migrations::new(vec![M::up(include_str!( Migrations::new(vec![M::up(include_str!(
@ -106,43 +108,99 @@ pub fn local_uuid(conn: &Connection) -> Result<uuid::Uuid, LocalUuidError> {
} }
#[derive(Debug)] #[derive(Debug)]
pub enum FindDatasetResult { pub enum FindDatasetError {
/// All searched paths belong to an existing dataset at this directory /// An empty list of paths was provided
ExistingDataset(PathBuf), NoPathsProvided,
/// Searched paths belong to multiple existing datasets (or some may belong to None) /// Something went wrong when parsing paths
MultipleDatasets(Vec<Option<PathBuf>>), PathError(std::io::Error),
/// None of the search paths belongs to an existing dataset. Returned path is the nearest /// None of the search paths belongs to an existing dataset. Returned path is the nearest
/// common ancestor of all searched paths (on same filesystem) /// common ancestor of all searched paths (on same filesystem)
NoDataset(PathBuf), NoDataset(PathBuf),
/// Some, but not all, search paths do not reside in an existing dataset
SomeNotInDataset,
/// Searched paths belong to multiple existing datasets (or some may belong to None)
MultipleDatasets {
datasets: Vec<PathBuf>,
some_paths_not_in_dataset: bool,
},
// TODO: REMOVE THIS
NotImplemented,
}
impl From<std::io::Error> for FindDatasetError {
fn from(e: std::io::Error) -> FindDatasetError { FindDatasetError::PathError(e) }
} }
#[derive(Debug)] /// Given a collection of paths, find a common directory containing them.
pub enum FindDatasetError { ///
/// The provided paths lie on different filesystems, so no common ancestor can be defined /// Returns `Ok(path)` if `path` is the only dataset found and it contains all given search paths.
DifferentFilesystems, pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<PathBuf, FindDatasetError> {
/// An empty list of paths was provided let mut ds_dirs: HashSet<PathBuf> = HashSet::new();
NoPathsProvided,
let first_path = paths.iter().next().ok_or(FindDatasetError::NoPathsProvided)?.canonicalize()?;
log::debug!("First path is {:?}", first_path);
let mut common_path = first_path.to_path_buf();
log::debug!("First path as PathBuf is {:?}", common_path);
let mut found_common_path = false;
let mut some_not_in_ds = false; // whether any paths are not in any dataset
for p in paths.iter() {
let pcanon = p.canonicalize()?;
log::debug!("Processing search path: {:?}", pcanon);
// Look for nancy.db in all ancestors
// NOTE: .ancestors() returns self first
let mut found_nancydb = false;
for pan in pcanon.ancestors() {
log::debug!("Looking for nancy.db for {:?} in {:?}", p, pan);
if pan.join("nancy.db").exists() {
log::debug!("Found nancy.db for {:?} in ancestor directory {:?}", p, pan);
found_nancydb = true;
ds_dirs.insert(pan.to_owned());
break;
}
}
if !found_nancydb {
some_not_in_ds = true;
} }
/// Given a collection of paths, find a common directory containing them // determine common path
pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<FindDatasetResult, FindDatasetError> { let mut this_common_path = PathBuf::new();
match paths.get(0) { for (l, r) in common_path.components().zip(pcanon.components()) {
None => Err(FindDatasetError::NoPathsProvided), log::debug!("Comparing components {:?} and {:?}", l, r);
Some(p) => { if l == r {
let mut ds_dirs: Vec<PathBuf> = Vec::new(); this_common_path.push(l.as_os_str());
let mut ancestor = p; found_common_path = true;
} else {
break;
}
}
common_path = this_common_path;
}
log::debug!("Found ds_dirs: {:?}", ds_dirs);
// for each canonicalized path from paths if found_common_path {
// look at parent directories until either: log::debug!("Found common search path: {:?}", common_path);
// - parent dir found in ds_dirs } else {
// - nancy.db found log::debug!("Did not find a common path");
// - reached root of filesystem (no dataset found)
// also, until we've found a common ancestor, take parent of ancestor.
for p in paths {
let c = p.canonicalize();
} }
Ok(FindDatasetResult::ExistingDataset(PathBuf::from("."))) if ds_dirs.len() == 0 {
} Err(FindDatasetError::NoDataset(common_path))
} else if ds_dirs.len() == 1 {
let d = ds_dirs.iter().next().expect("ds_dirs has exactly one value");
if some_not_in_ds {
Err(FindDatasetError::SomeNotInDataset)
} else {
Ok(d.to_path_buf())
}
} else { // ds_dirs.len() > 1
Err(FindDatasetError::MultipleDatasets {
datasets: ds_dirs.into_iter().collect(),
some_paths_not_in_dataset: some_not_in_ds,
})
// Note that multiple datasets might exist _and_ some paths may not be in them...
} }
} }

View File

@ -56,19 +56,11 @@ fn main() {
message, message,
record_paths, record_paths,
}) => { }) => {
// If no paths are given, use ["."] for the following steps.
// Determine dataset dir (ds_dir) // Determine dataset dir (ds_dir)
let mut conn = match nancy::db::find_dataset_dir(record_paths) { let mut conn = match nancy::db::find_dataset_dir(record_paths) {
Err(e) => { Err(nancy::db::FindDatasetError::NoDataset(path)) => {
log::error!("Could not determine dataset directory: {:?}", e);
process::exit(1);
}
Ok(res) => {
match res {
nancy::db::FindDatasetResult::MultipleDatasets(_) => {
log::error!("Provided paths belong to multiple datasets.");
process::exit(1);
}
nancy::db::FindDatasetResult::NoDataset(path) => {
// initialize // initialize
log::info!("No dataset at or above nearest ancestor path: {:?}", path); log::info!("No dataset at or above nearest ancestor path: {:?}", path);
let dbpath = &path.join("nancy.db"); let dbpath = &path.join("nancy.db");
@ -108,8 +100,12 @@ fn main() {
} }
} }
c c
} },
nancy::db::FindDatasetResult::ExistingDataset(path) => { Err(e) => {
log::error!("Could not determine dataset directory: {:?}", e);
process::exit(1);
},
Ok(path) => {
// existing // existing
log::info!("Found existing dataset at path: {:?}", path); log::info!("Found existing dataset at path: {:?}", path);
let dbpath = &path.join("nancy.db"); let dbpath = &path.join("nancy.db");
@ -120,7 +116,7 @@ fn main() {
) { ) {
Err(e) => { Err(e) => {
log::error!( log::error!(
"Could not open existing SQLite database at {dbpath:?}." "Could not open existing SQLite database at {dbpath:?}: {e:?}"
); );
process::exit(1); process::exit(1);
} }
@ -137,18 +133,8 @@ fn main() {
} }
} }
c c
} },
}
}
}; };
// If no paths given, use ["."] for the following steps.
// First, for each requested record_path, find whether it exists within a dataset dir
// Ensure that all paths are in the same dataset dir, or none belong to a dataset dir
// If no dataset dirs found, get top directory containing them all and set this as ds_dir
let mut conn = Connection::open_in_memory().expect("Could not create in-memory db");
// If initializing:
// If not initializing, ensure the schema is up to date
if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| { if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
let dataset_path = PathBuf::from("."); let dataset_path = PathBuf::from(".");