//! # Database housekeeping methods //! //! This module is mostly for managing migrations. This does not encapsulate all database accesses. //! extern crate derive_more; use derive_more::{Display, Error, From}; use log; use once_cell::sync::Lazy; use rusqlite; use rusqlite::Connection; use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M}; use uuid; use std::collections::HashSet; use std::num::NonZeroUsize; // for describing schema versions use std::path::PathBuf; static MIGRATIONS: Lazy> = Lazy::new(|| { Migrations::new(vec![M::up(include_str!( "migrations/20221024_initial_schema.sql" ))]) }); /// The current schema version as it will appear in the SQLite user_version pub static CURRENT_SCHEMA_VERSION: usize = 1; /// Error type for checking schema version of a Connection #[derive(Debug, Display)] pub enum SchemaError { CurrentVersionError(RMError), NoSchemaVersionSet, OldSchema(NonZeroUsize), OutsideSchema(NonZeroUsize), } impl From for SchemaError { fn from(cve: RMError) -> Self { Self::CurrentVersionError(cve) } } /// Get schema version in connected database pub fn schema_version(conn: &Connection) -> Result { match MIGRATIONS.current_version(conn)? { SchemaVersion::NoneSet => Err(SchemaError::NoSchemaVersionSet), SchemaVersion::Inside(v) => // A known version is set. Check if it is current { if v.get() == CURRENT_SCHEMA_VERSION { Ok(v) } else { Err(SchemaError::OldSchema(v)) } } SchemaVersion::Outside(v) => Err(SchemaError::OutsideSchema(v)), } } /// Initialize the database starting with an empty schema. /// /// This function initializes a database to the latest schema, and also generates a new random UUID /// and associates that with this dataset. The generated uuid is returned. pub fn init(conn: &mut Connection, name: &str) -> Result { MIGRATIONS.to_latest(conn)?; let u = uuid::Uuid::new_v4(); conn.execute( "INSERT INTO local_metadata (key, value) VALUES ('dataset_uuid', ?)", [u.to_string()], )?; conn.execute( "INSERT INTO dataset (key, name) VALUES (?1, ?2)", (u.as_bytes(), name), )?; Ok(u) } #[derive(Debug)] pub struct SchemaUpdateResult { pub old_version: Option, pub current_version: NonZeroUsize, pub updated: bool, } /// Ensure that the schema in conn is current. pub fn ensure_schema(conn: &mut Connection) -> Result { let old_version = schema_version(conn)?; let current_version = unsafe { NonZeroUsize::new_unchecked(CURRENT_SCHEMA_VERSION) }; Ok(SchemaUpdateResult { old_version: None, current_version: current_version, updated: old_version == current_version, }) } #[derive(Debug, Display, Error, From)] pub enum LocalUuidError { StatementPrepareError(rusqlite::Error), UuidError(uuid::Error), } /// Find the UUID of the dataset representing the directory containing the connected 'nancy.db' pub fn local_uuid(conn: &Connection) -> Result { let uuid = conn .prepare("SELECT value FROM local_metadata WHERE key = 'dataset_uuid' LIMIT 1")? .query_row([], |row| row.get(0))?; Ok(uuid) } #[derive(Debug)] pub enum FindDatasetError { /// An empty list of paths was provided NoPathsProvided, /// Something went wrong when parsing paths PathError(std::io::Error), /// None of the search paths belongs to an existing dataset. Returned path is the nearest /// common ancestor of all searched paths (on same filesystem) NoDataset(PathBuf), /// Some, but not all, search paths do not reside in an existing dataset SomeNotInDataset, /// Searched paths belong to multiple existing datasets (or some may belong to None) MultipleDatasets { datasets: Vec, some_paths_not_in_dataset: bool, }, // TODO: REMOVE THIS NotImplemented, } impl From for FindDatasetError { fn from(e: std::io::Error) -> FindDatasetError { FindDatasetError::PathError(e) } } /// Given a collection of paths, find a common directory containing them. /// /// Returns `Ok(path)` if `path` is the only dataset found and it contains all given search paths. pub fn find_dataset_dir(paths: &[PathBuf]) -> Result { let mut ds_dirs: HashSet = HashSet::new(); let first_path = paths .iter() .next() .ok_or(FindDatasetError::NoPathsProvided)? .canonicalize()?; log::debug!("First path is {:?}", first_path); let mut common_path = first_path.to_path_buf(); log::debug!("First path as PathBuf is {:?}", common_path); let mut found_common_path = false; let mut some_not_in_ds = false; // whether any paths are not in any dataset for p in paths.iter() { let pcanon = p.canonicalize()?; log::debug!("Processing search path: {:?}", pcanon); // Look for nancy.db in all ancestors // NOTE: .ancestors() returns self first let mut found_nancydb = false; for pan in pcanon.ancestors() { log::debug!("Looking for nancy.db for {:?} in {:?}", p, pan); if pan.join("nancy.db").exists() { log::debug!("Found nancy.db for {:?} in ancestor directory {:?}", p, pan); found_nancydb = true; ds_dirs.insert(pan.to_owned()); break; } } if !found_nancydb { some_not_in_ds = true; } // determine common path let mut this_common_path = PathBuf::new(); for (l, r) in common_path.components().zip(pcanon.components()) { log::debug!("Comparing components {:?} and {:?}", l, r); if l == r { this_common_path.push(l.as_os_str()); found_common_path = true; } else { break; } } common_path = this_common_path; } log::debug!("Found ds_dirs: {:?}", ds_dirs); if found_common_path { log::debug!("Found common search path: {:?}", common_path); } else { log::debug!("Did not find a common path"); } if ds_dirs.len() == 0 { Err(FindDatasetError::NoDataset(common_path)) } else if ds_dirs.len() == 1 { let d = ds_dirs .iter() .next() .expect("ds_dirs has exactly one value"); if some_not_in_ds { Err(FindDatasetError::SomeNotInDataset) } else { Ok(d.to_path_buf()) } } else { // ds_dirs.len() > 1 Err(FindDatasetError::MultipleDatasets { datasets: ds_dirs.into_iter().collect(), some_paths_not_in_dataset: some_not_in_ds, }) // Note that multiple datasets might exist _and_ some paths may not be in them... } }