nancyrs/src/db.rs

//! # Database housekeeping methods
//!
//! This module is mostly for managing migrations. This does not encapsulate all database accesses.
//!

extern crate derive_more;
use derive_more::{Display, Error, From};

use log;
use once_cell::sync::Lazy;
use rusqlite;
use rusqlite::Connection;
use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M};
use uuid;

use std::collections::HashSet;
use std::num::NonZeroUsize; // for describing schema versions
use std::path::PathBuf;

static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| {
    Migrations::new(vec![M::up(include_str!(
        "migrations/20221024_initial_schema.sql"
    ))])
});
/// The current schema version as it will appear in the SQLite user_version
pub static CURRENT_SCHEMA_VERSION: usize = 1;

/// Error type for checking schema version of a Connection
#[derive(Debug, Display)]
pub enum SchemaError {
    CurrentVersionError(RMError),
    NoSchemaVersionSet,
    OldSchema(NonZeroUsize),
    OutsideSchema(NonZeroUsize),
}
impl From<RMError> for SchemaError {
    fn from(cve: RMError) -> Self {
        Self::CurrentVersionError(cve)
    }
}

/// Get schema version in connected database
pub fn schema_version(conn: &Connection) -> Result<NonZeroUsize, SchemaError> {
    match MIGRATIONS.current_version(conn)? {
        SchemaVersion::NoneSet => Err(SchemaError::NoSchemaVersionSet),
        SchemaVersion::Inside(v) =>
        // A known version is set. Check if it is current
        {
            if v.get() == CURRENT_SCHEMA_VERSION {
                Ok(v)
            } else {
                Err(SchemaError::OldSchema(v))
            }
        }
        SchemaVersion::Outside(v) => Err(SchemaError::OutsideSchema(v)),
    }
}

/// Initialize the database starting with an empty schema.
///
/// This function initializes a database to the latest schema, and also generates a new random UUID
/// and associates that with this dataset. The generated uuid is returned.
pub fn init(conn: &mut Connection) -> Result<uuid::Uuid, RMError> {
    MIGRATIONS.to_latest(conn)?;
    let u = uuid::Uuid::new_v4();
    conn.execute(
        "INSERT INTO local_metadata (key, value) VALUES ('dataset_uuid', ?)",
        [u.to_string()],
    )?;
    Ok(u)
}

#[derive(Debug)]
pub struct SchemaUpdateResult {
    pub old_version: Option<NonZeroUsize>,
    pub current_version: NonZeroUsize,
    pub updated: bool,
}
/// Ensure that the schema in conn is current.
pub fn ensure_schema(conn: &mut Connection) -> Result<SchemaUpdateResult, SchemaError> {
    let old_version = schema_version(conn)?;
    let current_version = unsafe { NonZeroUsize::new_unchecked(CURRENT_SCHEMA_VERSION) };
    Ok(SchemaUpdateResult {
        old_version: None,
        current_version: current_version,
        updated: old_version == current_version,
    })
}

#[derive(Debug, Display, Error, From)]
pub enum LocalUuidError {
    StatementPrepareError(rusqlite::Error),
    UuidError(uuid::Error),
}
/// Find the UUID of the dataset representing the directory containing the connected 'nancy.db'
pub fn local_uuid(conn: &Connection) -> Result<uuid::Uuid, LocalUuidError> {
    let uuid = conn
        .prepare("SELECT value FROM local_metadata WHERE key = 'dataset_uuid' LIMIT 1")?
        .query_row([], |row| row.get(0))?;
    Ok(uuid)
}

#[derive(Debug)]
pub enum FindDatasetError {
    /// An empty list of paths was provided
    NoPathsProvided,
    /// Something went wrong when parsing paths
    PathError(std::io::Error),
    /// None of the search paths belongs to an existing dataset. Returned path is the nearest
    /// common ancestor of all searched paths (on same filesystem)
    NoDataset(PathBuf),
    /// Some, but not all, search paths do not reside in an existing dataset
    SomeNotInDataset,
    /// Searched paths belong to multiple existing datasets (or some may belong to None)
    MultipleDatasets {
        datasets: Vec<PathBuf>,
        some_paths_not_in_dataset: bool,
    },
    // TODO: REMOVE THIS
    NotImplemented,
}
impl From<std::io::Error> for FindDatasetError {
    fn from(e: std::io::Error) -> FindDatasetError {
        FindDatasetError::PathError(e)
    }
}

/// Given a collection of paths, find a common directory containing them.
///
/// Returns `Ok(path)` if `path` is the only dataset found and it contains all given search paths.
pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<PathBuf, FindDatasetError> {
    let mut ds_dirs: HashSet<PathBuf> = HashSet::new();

    let first_path = paths
        .iter()
        .next()
        .ok_or(FindDatasetError::NoPathsProvided)?
        .canonicalize()?;
    log::debug!("First path is {:?}", first_path);

    let mut common_path = first_path.to_path_buf();
    log::debug!("First path as PathBuf is {:?}", common_path);
    let mut found_common_path = false;

    let mut some_not_in_ds = false; // whether any paths are not in any dataset

    for p in paths.iter() {
        let pcanon = p.canonicalize()?;
        log::debug!("Processing search path: {:?}", pcanon);

        // Look for nancy.db in all ancestors
        // NOTE: .ancestors() returns self first
        let mut found_nancydb = false;
        for pan in pcanon.ancestors() {
            log::debug!("Looking for nancy.db for {:?} in {:?}", p, pan);
            if pan.join("nancy.db").exists() {
                log::debug!("Found nancy.db for {:?} in ancestor directory {:?}", p, pan);
                found_nancydb = true;
                ds_dirs.insert(pan.to_owned());
                break;
            }
        }
        if !found_nancydb {
            some_not_in_ds = true;
        }

        // determine common path
        let mut this_common_path = PathBuf::new();
        for (l, r) in common_path.components().zip(pcanon.components()) {
            log::debug!("Comparing components {:?} and {:?}", l, r);
            if l == r {
                this_common_path.push(l.as_os_str());
                found_common_path = true;
            } else {
                break;
            }
        }
        common_path = this_common_path;
    }
    log::debug!("Found ds_dirs: {:?}", ds_dirs);

    if found_common_path {
        log::debug!("Found common search path: {:?}", common_path);
    } else {
        log::debug!("Did not find a common path");
    }

    if ds_dirs.len() == 0 {
        Err(FindDatasetError::NoDataset(common_path))
    } else if ds_dirs.len() == 1 {
        let d = ds_dirs
            .iter()
            .next()
            .expect("ds_dirs has exactly one value");
        if some_not_in_ds {
            Err(FindDatasetError::SomeNotInDataset)
        } else {
            Ok(d.to_path_buf())
        }
    } else {
        // ds_dirs.len() > 1
        Err(FindDatasetError::MultipleDatasets {
            datasets: ds_dirs.into_iter().collect(),
            some_paths_not_in_dataset: some_not_in_ds,
        })
        // Note that multiple datasets might exist _and_ some paths may not be in them...
    }
}