Fix error handling and common path search in find_dataset_dir
This commit is contained in:
parent
a38bc78093
commit
88dd2bc220
118
src/db.rs
118
src/db.rs
@ -6,14 +6,16 @@
|
|||||||
extern crate derive_more;
|
extern crate derive_more;
|
||||||
use derive_more::{Display, Error, From};
|
use derive_more::{Display, Error, From};
|
||||||
|
|
||||||
|
use log;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use rusqlite;
|
use rusqlite;
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M};
|
use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M};
|
||||||
use uuid;
|
use uuid;
|
||||||
|
|
||||||
|
use std::collections::{HashSet};
|
||||||
use std::num::NonZeroUsize; // for describing schema versions
|
use std::num::NonZeroUsize; // for describing schema versions
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Components, Path, PathBuf};
|
||||||
|
|
||||||
static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| {
|
static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| {
|
||||||
Migrations::new(vec![M::up(include_str!(
|
Migrations::new(vec![M::up(include_str!(
|
||||||
@ -106,43 +108,99 @@ pub fn local_uuid(conn: &Connection) -> Result<uuid::Uuid, LocalUuidError> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum FindDatasetResult {
|
pub enum FindDatasetError {
|
||||||
/// All searched paths belong to an existing dataset at this directory
|
/// An empty list of paths was provided
|
||||||
ExistingDataset(PathBuf),
|
NoPathsProvided,
|
||||||
/// Searched paths belong to multiple existing datasets (or some may belong to None)
|
/// Something went wrong when parsing paths
|
||||||
MultipleDatasets(Vec<Option<PathBuf>>),
|
PathError(std::io::Error),
|
||||||
/// None of the search paths belongs to an existing dataset. Returned path is the nearest
|
/// None of the search paths belongs to an existing dataset. Returned path is the nearest
|
||||||
/// common ancestor of all searched paths (on same filesystem)
|
/// common ancestor of all searched paths (on same filesystem)
|
||||||
NoDataset(PathBuf),
|
NoDataset(PathBuf),
|
||||||
|
/// Some, but not all, search paths do not reside in an existing dataset
|
||||||
|
SomeNotInDataset,
|
||||||
|
/// Searched paths belong to multiple existing datasets (or some may belong to None)
|
||||||
|
MultipleDatasets {
|
||||||
|
datasets: Vec<PathBuf>,
|
||||||
|
some_paths_not_in_dataset: bool,
|
||||||
|
},
|
||||||
|
// TODO: REMOVE THIS
|
||||||
|
NotImplemented,
|
||||||
|
}
|
||||||
|
impl From<std::io::Error> for FindDatasetError {
|
||||||
|
fn from(e: std::io::Error) -> FindDatasetError { FindDatasetError::PathError(e) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
/// Given a collection of paths, find a common directory containing them.
|
||||||
pub enum FindDatasetError {
|
///
|
||||||
/// The provided paths lie on different filesystems, so no common ancestor can be defined
|
/// Returns `Ok(path)` if `path` is the only dataset found and it contains all given search paths.
|
||||||
DifferentFilesystems,
|
pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<PathBuf, FindDatasetError> {
|
||||||
/// An empty list of paths was provided
|
let mut ds_dirs: HashSet<PathBuf> = HashSet::new();
|
||||||
NoPathsProvided,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Given a collection of paths, find a common directory containing them
|
let first_path = paths.iter().next().ok_or(FindDatasetError::NoPathsProvided)?.canonicalize()?;
|
||||||
pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<FindDatasetResult, FindDatasetError> {
|
log::debug!("First path is {:?}", first_path);
|
||||||
match paths.get(0) {
|
|
||||||
None => Err(FindDatasetError::NoPathsProvided),
|
|
||||||
Some(p) => {
|
|
||||||
let mut ds_dirs: Vec<PathBuf> = Vec::new();
|
|
||||||
let mut ancestor = p;
|
|
||||||
|
|
||||||
// for each canonicalized path from paths
|
let mut common_path = first_path.to_path_buf();
|
||||||
// look at parent directories until either:
|
log::debug!("First path as PathBuf is {:?}", common_path);
|
||||||
// - parent dir found in ds_dirs
|
let mut found_common_path = false;
|
||||||
// - nancy.db found
|
|
||||||
// - reached root of filesystem (no dataset found)
|
let mut some_not_in_ds = false; // whether any paths are not in any dataset
|
||||||
// also, until we've found a common ancestor, take parent of ancestor.
|
|
||||||
for p in paths {
|
for p in paths.iter() {
|
||||||
let c = p.canonicalize();
|
let pcanon = p.canonicalize()?;
|
||||||
|
log::debug!("Processing search path: {:?}", pcanon);
|
||||||
|
|
||||||
|
// Look for nancy.db in all ancestors
|
||||||
|
// NOTE: .ancestors() returns self first
|
||||||
|
let mut found_nancydb = false;
|
||||||
|
for pan in pcanon.ancestors() {
|
||||||
|
log::debug!("Looking for nancy.db for {:?} in {:?}", p, pan);
|
||||||
|
if pan.join("nancy.db").exists() {
|
||||||
|
log::debug!("Found nancy.db for {:?} in ancestor directory {:?}", p, pan);
|
||||||
|
found_nancydb = true;
|
||||||
|
ds_dirs.insert(pan.to_owned());
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(FindDatasetResult::ExistingDataset(PathBuf::from(".")))
|
|
||||||
}
|
}
|
||||||
|
if !found_nancydb {
|
||||||
|
some_not_in_ds = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// determine common path
|
||||||
|
let mut this_common_path = PathBuf::new();
|
||||||
|
for (l, r) in common_path.components().zip(pcanon.components()) {
|
||||||
|
log::debug!("Comparing components {:?} and {:?}", l, r);
|
||||||
|
if l == r {
|
||||||
|
this_common_path.push(l.as_os_str());
|
||||||
|
found_common_path = true;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
common_path = this_common_path;
|
||||||
|
}
|
||||||
|
log::debug!("Found ds_dirs: {:?}", ds_dirs);
|
||||||
|
|
||||||
|
if found_common_path {
|
||||||
|
log::debug!("Found common search path: {:?}", common_path);
|
||||||
|
} else {
|
||||||
|
log::debug!("Did not find a common path");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ds_dirs.len() == 0 {
|
||||||
|
Err(FindDatasetError::NoDataset(common_path))
|
||||||
|
} else if ds_dirs.len() == 1 {
|
||||||
|
let d = ds_dirs.iter().next().expect("ds_dirs has exactly one value");
|
||||||
|
if some_not_in_ds {
|
||||||
|
Err(FindDatasetError::SomeNotInDataset)
|
||||||
|
} else {
|
||||||
|
Ok(d.to_path_buf())
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // ds_dirs.len() > 1
|
||||||
|
Err(FindDatasetError::MultipleDatasets {
|
||||||
|
datasets: ds_dirs.into_iter().collect(),
|
||||||
|
some_paths_not_in_dataset: some_not_in_ds,
|
||||||
|
})
|
||||||
|
// Note that multiple datasets might exist _and_ some paths may not be in them...
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
150
src/main.rs
150
src/main.rs
@ -56,99 +56,85 @@ fn main() {
|
|||||||
message,
|
message,
|
||||||
record_paths,
|
record_paths,
|
||||||
}) => {
|
}) => {
|
||||||
|
// If no paths are given, use ["."] for the following steps.
|
||||||
|
|
||||||
// Determine dataset dir (ds_dir)
|
// Determine dataset dir (ds_dir)
|
||||||
let mut conn = match nancy::db::find_dataset_dir(record_paths) {
|
let mut conn = match nancy::db::find_dataset_dir(record_paths) {
|
||||||
|
Err(nancy::db::FindDatasetError::NoDataset(path)) => {
|
||||||
|
// initialize
|
||||||
|
log::info!("No dataset at or above nearest ancestor path: {:?}", path);
|
||||||
|
let dbpath = &path.join("nancy.db");
|
||||||
|
let mut c = match Connection::open(dbpath) {
|
||||||
|
Err(e) => {
|
||||||
|
log::error!(
|
||||||
|
"Could not open new SQLite database at {dbpath:?}."
|
||||||
|
);
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
Ok(cc) => cc,
|
||||||
|
};
|
||||||
|
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
||||||
|
match nancy::db::init(&mut c) {
|
||||||
|
Err(e) => {
|
||||||
|
log::error!(
|
||||||
|
"Encountered error in initializing schema: {:?}",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
Ok(dataset_uuid) => {
|
||||||
|
log::trace!("Init OK");
|
||||||
|
log::info!("Dataset UUID is {dataset_uuid}");
|
||||||
|
// Run an empty program so that the dataset log reflects when it was
|
||||||
|
// initialized
|
||||||
|
nancy::program::with_program(
|
||||||
|
&mut c,
|
||||||
|
"INIT",
|
||||||
|
"Initialize dataset",
|
||||||
|
|_| {
|
||||||
|
let okres: Result<(), ()> = Ok(());
|
||||||
|
okres
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.expect("Empty program should not throw error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c
|
||||||
|
},
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::error!("Could not determine dataset directory: {:?}", e);
|
log::error!("Could not determine dataset directory: {:?}", e);
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
},
|
||||||
Ok(res) => {
|
Ok(path) => {
|
||||||
match res {
|
// existing
|
||||||
nancy::db::FindDatasetResult::MultipleDatasets(_) => {
|
log::info!("Found existing dataset at path: {:?}", path);
|
||||||
log::error!("Provided paths belong to multiple datasets.");
|
let dbpath = &path.join("nancy.db");
|
||||||
|
// open with flags to prevent creating when we believe the db exists
|
||||||
|
let mut c = match Connection::open_with_flags(
|
||||||
|
dbpath,
|
||||||
|
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
||||||
|
) {
|
||||||
|
Err(e) => {
|
||||||
|
log::error!(
|
||||||
|
"Could not open existing SQLite database at {dbpath:?}: {e:?}"
|
||||||
|
);
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
nancy::db::FindDatasetResult::NoDataset(path) => {
|
Ok(cc) => cc,
|
||||||
// initialize
|
};
|
||||||
log::info!("No dataset at or above nearest ancestor path: {:?}", path);
|
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
||||||
let dbpath = &path.join("nancy.db");
|
match nancy::db::ensure_schema(&mut c, true) {
|
||||||
let mut c = match Connection::open(dbpath) {
|
Err(e) => {
|
||||||
Err(e) => {
|
log::error!("Error ensuring schema: {}", e);
|
||||||
log::error!(
|
process::exit(1);
|
||||||
"Could not open new SQLite database at {dbpath:?}."
|
|
||||||
);
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
Ok(cc) => cc,
|
|
||||||
};
|
|
||||||
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
|
||||||
match nancy::db::init(&mut c) {
|
|
||||||
Err(e) => {
|
|
||||||
log::error!(
|
|
||||||
"Encountered error in initializing schema: {:?}",
|
|
||||||
e
|
|
||||||
);
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
Ok(dataset_uuid) => {
|
|
||||||
log::trace!("Init OK");
|
|
||||||
log::info!("Dataset UUID is {dataset_uuid}");
|
|
||||||
// Run an empty program so that the dataset log reflects when it was
|
|
||||||
// initialized
|
|
||||||
nancy::program::with_program(
|
|
||||||
&mut c,
|
|
||||||
"INIT",
|
|
||||||
"Initialize dataset",
|
|
||||||
|_| {
|
|
||||||
let okres: Result<(), ()> = Ok(());
|
|
||||||
okres
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.expect("Empty program should not throw error");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
c
|
|
||||||
}
|
}
|
||||||
nancy::db::FindDatasetResult::ExistingDataset(path) => {
|
Ok(update_result) => {
|
||||||
// existing
|
log::debug!("Schema ensured: {:?}", update_result);
|
||||||
log::info!("Found existing dataset at path: {:?}", path);
|
|
||||||
let dbpath = &path.join("nancy.db");
|
|
||||||
// open with flags to prevent creating when we believe the db exists
|
|
||||||
let mut c = match Connection::open_with_flags(
|
|
||||||
dbpath,
|
|
||||||
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
|
||||||
) {
|
|
||||||
Err(e) => {
|
|
||||||
log::error!(
|
|
||||||
"Could not open existing SQLite database at {dbpath:?}."
|
|
||||||
);
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
Ok(cc) => cc,
|
|
||||||
};
|
|
||||||
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
|
||||||
match nancy::db::ensure_schema(&mut c, true) {
|
|
||||||
Err(e) => {
|
|
||||||
log::error!("Error ensuring schema: {}", e);
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
Ok(update_result) => {
|
|
||||||
log::debug!("Schema ensured: {:?}", update_result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
c
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
c
|
||||||
|
},
|
||||||
};
|
};
|
||||||
// If no paths given, use ["."] for the following steps.
|
|
||||||
// First, for each requested record_path, find whether it exists within a dataset dir
|
|
||||||
// Ensure that all paths are in the same dataset dir, or none belong to a dataset dir
|
|
||||||
// If no dataset dirs found, get top directory containing them all and set this as ds_dir
|
|
||||||
let mut conn = Connection::open_in_memory().expect("Could not create in-memory db");
|
|
||||||
|
|
||||||
// If initializing:
|
|
||||||
// If not initializing, ensure the schema is up to date
|
|
||||||
|
|
||||||
if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
|
if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
|
||||||
let dataset_path = PathBuf::from(".");
|
let dataset_path = PathBuf::from(".");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user