Start on find_dataset_dir. Format with cargo fmt
This commit is contained in:
parent
3556590f7b
commit
a38bc78093
@ -35,6 +35,7 @@
|
||||
inputsFrom = builtins.attrValues self.packages.${system};
|
||||
buildInputs = [
|
||||
cargo
|
||||
clippy
|
||||
rust-analyzer clippy rustfmt # linting
|
||||
sqlite
|
||||
openssl pkgconfig # for openssl-sys dep that gets pulled in
|
||||
|
||||
78
src/db.rs
78
src/db.rs
@ -9,19 +9,17 @@ use derive_more::{Display,Error,From};
|
||||
use once_cell::sync::Lazy;
|
||||
use rusqlite;
|
||||
use rusqlite::Connection;
|
||||
use rusqlite_migration::{Error as RMError, Migrations, M, SchemaVersion};
|
||||
use rusqlite_migration::{Error as RMError, Migrations, SchemaVersion, M};
|
||||
use uuid;
|
||||
|
||||
use std::num::{NonZeroUsize}; // for describing schema versions
|
||||
use std::num::NonZeroUsize; // for describing schema versions
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// NOTE: apply pragmas outside of migrations, using e.g.
|
||||
// conn.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
||||
static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(||
|
||||
Migrations::new(
|
||||
vec![
|
||||
M::up(include_str!("migrations/20221024_initial_schema.sql")),
|
||||
]
|
||||
));
|
||||
static MIGRATIONS: Lazy<Migrations<'static>> = Lazy::new(|| {
|
||||
Migrations::new(vec![M::up(include_str!(
|
||||
"migrations/20221024_initial_schema.sql"
|
||||
))])
|
||||
});
|
||||
/// The current schema version as it will appear in the SQLite user_version
|
||||
pub static CURRENT_SCHEMA_VERSION: usize = 1;
|
||||
|
||||
@ -43,17 +41,19 @@ impl From<RMError> for SchemaError {
|
||||
pub fn schema_version(conn: &Connection) -> Result<NonZeroUsize, SchemaError> {
|
||||
match MIGRATIONS.current_version(conn)? {
|
||||
SchemaVersion::NoneSet => Err(SchemaError::NoSchemaVersionSet),
|
||||
SchemaVersion::Inside(v) => // A known version is set. Check if it is current
|
||||
SchemaVersion::Inside(v) =>
|
||||
// A known version is set. Check if it is current
|
||||
{
|
||||
if v.get() == CURRENT_SCHEMA_VERSION {
|
||||
Ok(v)
|
||||
} else {
|
||||
Err(SchemaError::OldSchema(v))
|
||||
},
|
||||
}
|
||||
}
|
||||
SchemaVersion::Outside(v) => Err(SchemaError::OutsideSchema(v)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Initialize the database starting with an empty schema.
|
||||
///
|
||||
/// This function initializes a database to the latest schema, and also generates a new random UUID
|
||||
@ -68,7 +68,6 @@ pub fn init(conn: &mut Connection) -> Result<uuid::Uuid, RMError> {
|
||||
Ok(u)
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SchemaUpdateResult {
|
||||
pub old_version: Option<NonZeroUsize>,
|
||||
@ -80,8 +79,10 @@ pub struct SchemaUpdateResult {
|
||||
/// If migrate is true, migrate the schema and return.
|
||||
///
|
||||
/// If migrate is false, throw an Error if schema is not current.
|
||||
pub fn ensure_schema(conn: &mut Connection, migrate: bool)
|
||||
-> Result<SchemaUpdateResult, SchemaError> {
|
||||
pub fn ensure_schema(
|
||||
conn: &mut Connection,
|
||||
migrate: bool,
|
||||
) -> Result<SchemaUpdateResult, SchemaError> {
|
||||
let old_version = schema_version(conn)?;
|
||||
let current_version = unsafe { NonZeroUsize::new_unchecked(CURRENT_SCHEMA_VERSION) };
|
||||
Ok(SchemaUpdateResult {
|
||||
@ -98,7 +99,50 @@ pub enum LocalUuidError {
|
||||
}
|
||||
/// Find the UUID of the dataset representing the directory containing the connected 'nancy.db'
|
||||
pub fn local_uuid(conn: &Connection) -> Result<uuid::Uuid, LocalUuidError> {
|
||||
let uuid = conn.prepare("SELECT value FROM local_metadata WHERE key = 'dataset_uuid' LIMIT 1")?
|
||||
let uuid = conn
|
||||
.prepare("SELECT value FROM local_metadata WHERE key = 'dataset_uuid' LIMIT 1")?
|
||||
.query_row([], |row| row.get(0))?;
|
||||
Ok(uuid)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FindDatasetResult {
|
||||
/// All searched paths belong to an existing dataset at this directory
|
||||
ExistingDataset(PathBuf),
|
||||
/// Searched paths belong to multiple existing datasets (or some may belong to None)
|
||||
MultipleDatasets(Vec<Option<PathBuf>>),
|
||||
/// None of the search paths belongs to an existing dataset. Returned path is the nearest
|
||||
/// common ancestor of all searched paths (on same filesystem)
|
||||
NoDataset(PathBuf),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FindDatasetError {
|
||||
/// The provided paths lie on different filesystems, so no common ancestor can be defined
|
||||
DifferentFilesystems,
|
||||
/// An empty list of paths was provided
|
||||
NoPathsProvided,
|
||||
}
|
||||
|
||||
/// Given a collection of paths, find a common directory containing them
|
||||
pub fn find_dataset_dir(paths: &[PathBuf]) -> Result<FindDatasetResult, FindDatasetError> {
|
||||
match paths.get(0) {
|
||||
None => Err(FindDatasetError::NoPathsProvided),
|
||||
Some(p) => {
|
||||
let mut ds_dirs: Vec<PathBuf> = Vec::new();
|
||||
let mut ancestor = p;
|
||||
|
||||
// for each canonicalized path from paths
|
||||
// look at parent directories until either:
|
||||
// - parent dir found in ds_dirs
|
||||
// - nancy.db found
|
||||
// - reached root of filesystem (no dataset found)
|
||||
// also, until we've found a common ancestor, take parent of ancestor.
|
||||
for p in paths {
|
||||
let c = p.canonicalize();
|
||||
}
|
||||
|
||||
Ok(FindDatasetResult::ExistingDataset(PathBuf::from(".")))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
//use jwalk;
|
||||
use log;
|
||||
use sha2::{Sha256, Digest, digest::FixedOutput};
|
||||
use std::path::{Path};
|
||||
|
||||
use sha2::{digest::FixedOutput, Digest, Sha256};
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum RecordError {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
|
||||
use rusqlite::{Connection};
|
||||
use uuid::{Uuid};
|
||||
use rusqlite::Connection;
|
||||
use uuid::Uuid;
|
||||
|
||||
//pub mod data;
|
||||
pub mod db;
|
||||
|
||||
128
src/main.rs
128
src/main.rs
@ -1,12 +1,8 @@
|
||||
use clap::{Parser, Subcommand};
|
||||
use env_logger;
|
||||
use log;
|
||||
use rusqlite::{Connection};
|
||||
use rusqlite::{Connection, OpenFlags};
|
||||
|
||||
use nancy;
|
||||
|
||||
use std::{process};
|
||||
use std::path::{PathBuf};
|
||||
use std::path::PathBuf;
|
||||
use std::process;
|
||||
|
||||
// Composable provenance tracking for scientific data analysis
|
||||
#[derive(Parser)]
|
||||
@ -24,7 +20,7 @@ enum Commands {
|
||||
/// If not within an existing dataset, create one
|
||||
#[arg(short, long)]
|
||||
initialize: bool,
|
||||
/// Use
|
||||
/// A short descriptive message for this recording, i.e. "Re-run with lr=1e-3"
|
||||
#[arg(short, long)]
|
||||
message: String,
|
||||
#[arg()]
|
||||
@ -49,54 +45,120 @@ fn main() {
|
||||
match nancy::print_uuid() {
|
||||
Ok(_) => {
|
||||
println!("OK");
|
||||
},
|
||||
}
|
||||
Err(e) => {
|
||||
println!("SQLite error: {}", e);
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
Some(Commands::Record { initialize, message, record_paths }) => {
|
||||
Some(Commands::Record {
|
||||
initialize,
|
||||
message,
|
||||
record_paths,
|
||||
}) => {
|
||||
// Determine dataset dir (ds_dir)
|
||||
// If no paths given, use ["."] for the following steps.
|
||||
// First, for each requested record_path, find whether it exists within a dataset dir
|
||||
// Ensure that all paths are in the same dataset dir, or none belong to a dataset dir
|
||||
// If no dataset dirs found, get top directory containing them all and set this as ds_dir
|
||||
let mut conn = Connection::open_in_memory().expect("Could not create in-memory db");
|
||||
// If initializing:
|
||||
match nancy::db::init(&mut conn) {
|
||||
let mut conn = match nancy::db::find_dataset_dir(record_paths) {
|
||||
Err(e) => {
|
||||
log::error!("Encountered error in initializing schema: {:?}", e);
|
||||
log::error!("Could not determine dataset directory: {:?}", e);
|
||||
process::exit(1);
|
||||
}
|
||||
Ok(res) => {
|
||||
match res {
|
||||
nancy::db::FindDatasetResult::MultipleDatasets(_) => {
|
||||
log::error!("Provided paths belong to multiple datasets.");
|
||||
process::exit(1);
|
||||
}
|
||||
nancy::db::FindDatasetResult::NoDataset(path) => {
|
||||
// initialize
|
||||
log::info!("No dataset at or above nearest ancestor path: {:?}", path);
|
||||
let dbpath = &path.join("nancy.db");
|
||||
let mut c = match Connection::open(dbpath) {
|
||||
Err(e) => {
|
||||
log::error!(
|
||||
"Could not open new SQLite database at {dbpath:?}."
|
||||
);
|
||||
process::exit(1);
|
||||
}
|
||||
Ok(cc) => cc,
|
||||
};
|
||||
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
||||
match nancy::db::init(&mut c) {
|
||||
Err(e) => {
|
||||
log::error!(
|
||||
"Encountered error in initializing schema: {:?}",
|
||||
e
|
||||
);
|
||||
process::exit(1);
|
||||
}
|
||||
Ok(dataset_uuid) => {
|
||||
log::trace!("Init OK");
|
||||
log::info!("Dataset UUID is {dataset_uuid}");
|
||||
// Run an empty program so that the dataset log reflects when it was
|
||||
// initialized
|
||||
nancy::program::with_program(
|
||||
&mut conn,
|
||||
&mut c,
|
||||
"INIT",
|
||||
"Initialize dataset",
|
||||
|prog| {
|
||||
|_| {
|
||||
let okres: Result<(), ()> = Ok(());
|
||||
okres
|
||||
}
|
||||
).expect("Empty program should not throw error");
|
||||
},
|
||||
)
|
||||
.expect("Empty program should not throw error");
|
||||
}
|
||||
}
|
||||
match nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
|
||||
c
|
||||
}
|
||||
nancy::db::FindDatasetResult::ExistingDataset(path) => {
|
||||
// existing
|
||||
log::info!("Found existing dataset at path: {:?}", path);
|
||||
let dbpath = &path.join("nancy.db");
|
||||
// open with flags to prevent creating when we believe the db exists
|
||||
let mut c = match Connection::open_with_flags(
|
||||
dbpath,
|
||||
OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX,
|
||||
) {
|
||||
Err(e) => {
|
||||
log::error!(
|
||||
"Could not open existing SQLite database at {dbpath:?}."
|
||||
);
|
||||
process::exit(1);
|
||||
}
|
||||
Ok(cc) => cc,
|
||||
};
|
||||
c.pragma_update(None, "foreign_keys", &"ON").unwrap();
|
||||
match nancy::db::ensure_schema(&mut c, true) {
|
||||
Err(e) => {
|
||||
log::error!("Error ensuring schema: {}", e);
|
||||
process::exit(1);
|
||||
}
|
||||
Ok(update_result) => {
|
||||
log::debug!("Schema ensured: {:?}", update_result);
|
||||
}
|
||||
}
|
||||
c
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
// If no paths given, use ["."] for the following steps.
|
||||
// First, for each requested record_path, find whether it exists within a dataset dir
|
||||
// Ensure that all paths are in the same dataset dir, or none belong to a dataset dir
|
||||
// If no dataset dirs found, get top directory containing them all and set this as ds_dir
|
||||
let mut conn = Connection::open_in_memory().expect("Could not create in-memory db");
|
||||
|
||||
// If initializing:
|
||||
// If not initializing, ensure the schema is up to date
|
||||
|
||||
if let Err(e) = nancy::program::with_program(&mut conn, "RECORD", message, |prog| {
|
||||
let dataset_path = PathBuf::from(".");
|
||||
// Note that this may fail, in which case we should roll back only this program
|
||||
// but keep the dataset initialized.
|
||||
nancy::fs::record(
|
||||
&dataset_path,
|
||||
message,
|
||||
)
|
||||
nancy::fs::record(&dataset_path, message)
|
||||
}) {
|
||||
Err(e) => {
|
||||
log::error!("Encountered error in RECORD program: {:?}", e);
|
||||
process::exit(3);
|
||||
},
|
||||
Ok(()) => (),
|
||||
}
|
||||
process::exit(1);
|
||||
};
|
||||
}
|
||||
Some(Commands::Status {}) => {
|
||||
println!("status not yet implemented");
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
use log;
|
||||
use rusqlite::{Connection, Error as RSQLError};
|
||||
extern crate derive_more;
|
||||
use derive_more::{From};
|
||||
use derive_more::From;
|
||||
|
||||
use std::time::{Instant, SystemTime};
|
||||
|
||||
@ -18,8 +18,7 @@ pub struct TaskInput<T> {
|
||||
datum: T,
|
||||
}
|
||||
|
||||
pub struct Program {
|
||||
}
|
||||
pub struct Program {}
|
||||
impl Program {
|
||||
//fn new(conn: &Connection) -> Self {
|
||||
//Program {
|
||||
@ -36,7 +35,9 @@ pub enum ProgramError<E> {
|
||||
CommitFailed(RSQLError),
|
||||
}
|
||||
impl<E> From<E> for ProgramError<E> {
|
||||
fn from(e: E) -> ProgramError<E> { ProgramError::ProgramFailed(e) }
|
||||
fn from(e: E) -> ProgramError<E> {
|
||||
ProgramError::ProgramFailed(e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Run a closure as a program, within a database transaction
|
||||
@ -62,10 +63,7 @@ pub fn with_program<E, F : FnOnce(Program) -> Result<(), E>>(
|
||||
log::debug!(target: log_target, "Starting timers");
|
||||
let start_st = SystemTime::now();
|
||||
let start = Instant::now();
|
||||
let start_stamp = timing::persistent_stamp(
|
||||
Instant::now(),
|
||||
start,
|
||||
start_st);
|
||||
let start_stamp = timing::persistent_stamp(Instant::now(), start, start_st);
|
||||
|
||||
// Instantiate Program
|
||||
// (record name and message for new program, get program ID)
|
||||
@ -77,14 +75,23 @@ pub fn with_program<E, F : FnOnce(Program) -> Result<(), E>>(
|
||||
// end timer
|
||||
// Commit scope for RECORD program
|
||||
let end = Instant::now();
|
||||
log::debug!(target: log_target, "Start time: {:?} f64 timestamp={:#?}", start_st, start_stamp);
|
||||
log::debug!(target: log_target, "Elapsed: {} seconds", (end - start).as_secs_f64());
|
||||
log::debug!(
|
||||
target: log_target,
|
||||
"Start time: {:?} f64 timestamp={:#?}",
|
||||
start_st,
|
||||
start_stamp
|
||||
);
|
||||
log::debug!(
|
||||
target: log_target,
|
||||
"Elapsed: {} seconds",
|
||||
(end - start).as_secs_f64()
|
||||
);
|
||||
// record end time
|
||||
// commit transaction
|
||||
match tx.commit() {
|
||||
Err(e) => Err(ProgramError::CommitFailed(e)),
|
||||
Ok(_) => Ok(()),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,7 +3,8 @@ use std::time::{Instant, SystemTime, UNIX_EPOCH};
|
||||
/// This converts an Instant to a double-precision float, which as of October 2022 has roughly 300
|
||||
/// ns of resolution.
|
||||
pub fn persistent_stamp(instant: Instant, base_instant: Instant, base_systime: SystemTime) -> f64 {
|
||||
base_systime.checked_add(instant - base_instant)
|
||||
base_systime
|
||||
.checked_add(instant - base_instant)
|
||||
.expect("Bounds error in std::time::SystemTime::checked_add")
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Clock may have gone backward")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user