Refactor record() a bit to clearly state steps, with stubs
This commit is contained in:
parent
3ead31d56d
commit
5151703dcc
96
src/fs.rs
96
src/fs.rs
@ -284,12 +284,73 @@ fn insert_file_content_hashes(tx: &Transaction) -> Result<(), RecordError> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find latest entries in filedir_version that are not deleted, and that do not appear in
|
||||||
|
/// current_files, but whose parents _do_ appear. Create new versions marking these as deleted.
|
||||||
|
fn find_deleted(tx: &Transaction, p: &Path, path_key: Hash256) -> Result<(), RecordError> {
|
||||||
|
// Do a CTE from filedir to get key and relpath for all files below p (inclusive)
|
||||||
|
|
||||||
|
// For files in filedir but not in current_files, insert entries into temp table deleted_files
|
||||||
|
// marking them as deleted.
|
||||||
|
|
||||||
|
log::warn!("find_deleted not yet implemented");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove rows from deleted_files which were previously deleted in the database
|
||||||
|
fn prune_deleted(tx: &Transaction) -> Result<(), RecordError> {
|
||||||
|
// Join filedir_version to deleted_files to get latest deleted status, then drop
|
||||||
|
|
||||||
|
log::warn!("prune_deleted not yet implemented");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute directory hashes in the current_files table
|
||||||
|
fn compute_current_directory_hashes(tx: &Transaction) -> Result<(), RecordError> {
|
||||||
|
// Extract files with a CTE on current_files, in alphabetical order. For each row, if it's not
|
||||||
|
// a directory compare parent column to see whether we've changed to a new parent, in which
|
||||||
|
// case we finalize this parent dir by recording its size and hash. Once a directory is
|
||||||
|
// finalized, we can update its row in current_files. The recorded time of a directory should
|
||||||
|
// be the max of all children recorded times.
|
||||||
|
let mut dirstack: Vec<(Hash256, usize, Context, f64)> = Vec::new();
|
||||||
|
|
||||||
|
// Find all entries in current_files that declare parents that are not None and are not in
|
||||||
|
// current_files. Each of these parents must be added with a new version. The content_hash must
|
||||||
|
// be derived from the existing and new file versions, which should be represented in
|
||||||
|
// filedir_version at this point. Add these in depth-first order.
|
||||||
|
|
||||||
|
log::warn!("update_current_directories not yet implemented");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Transfer rows in current_files and deleted_files into new rows in filedir_version
|
||||||
|
fn persist_temp_tables(tx: &Transaction, task_uuid: Uuid, dataset_uuid: Uuid) -> Result<(), RecordError> {
|
||||||
|
tx.execute("
|
||||||
|
INSERT OR IGNORE INTO
|
||||||
|
filedir
|
||||||
|
SELECT
|
||||||
|
sha256, ?, name, parent
|
||||||
|
FROM current_files
|
||||||
|
",
|
||||||
|
(dataset_uuid.as_bytes(),),
|
||||||
|
)?;
|
||||||
|
tx.execute("
|
||||||
|
INSERT INTO
|
||||||
|
filedir_version
|
||||||
|
SELECT
|
||||||
|
version_uuid, sha256, recorded_time, filetype, FALSE, symlink_target, content_sha256, ?
|
||||||
|
FROM current_files
|
||||||
|
",
|
||||||
|
(task_uuid.as_bytes(),),
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn record(
|
pub fn record(
|
||||||
tx: &Transaction,
|
tx: &Transaction,
|
||||||
paths: &[PathBuf],
|
paths: &[PathBuf],
|
||||||
ds_root: &Path,
|
ds_root: &Path,
|
||||||
message: &str,
|
message: &str,
|
||||||
//task_uuid: Uuid,
|
task_uuid: Uuid,
|
||||||
) -> Result<(), RecordError> {
|
) -> Result<(), RecordError> {
|
||||||
log::info!(
|
log::info!(
|
||||||
"Recording path {:?} for dataset at {:?} with user-provided message \"{}\"",
|
"Recording path {:?} for dataset at {:?} with user-provided message \"{}\"",
|
||||||
@ -319,9 +380,7 @@ pub fn record(
|
|||||||
|
|
||||||
// This schema is like filedir joined with filedir_version
|
// This schema is like filedir joined with filedir_version
|
||||||
// TODO: revert this to a TEMP TABLE after it's debugged
|
// TODO: revert this to a TEMP TABLE after it's debugged
|
||||||
tx.execute("CREATE
|
tx.execute("CREATE TEMP TABLE current_files (
|
||||||
-- TEMP
|
|
||||||
TABLE current_files (
|
|
||||||
id INTEGER PRIMARY KEY NOT NULL, -- only used in this table
|
id INTEGER PRIMARY KEY NOT NULL, -- only used in this table
|
||||||
sha256 BLOB NOT NULL, -- will become the primary key on filedir
|
sha256 BLOB NOT NULL, -- will become the primary key on filedir
|
||||||
name TEXT NOT NULL, -- filename without path
|
name TEXT NOT NULL, -- filename without path
|
||||||
@ -366,30 +425,25 @@ pub fn record(
|
|||||||
prev_key = Hash256(hashbuf);
|
prev_key = Hash256(hashbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
walk_and_insert(tx, &p, prev_key)?; // SINGLE-THREADED
|
walk_and_insert(tx, &p, prev_key)?;
|
||||||
|
|
||||||
|
// Find deleted files ONLY under the paths we are given
|
||||||
|
find_deleted(tx, &p, prev_key)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
insert_file_content_hashes(tx)?; // MULTI-THREADED
|
// MULTI-THREADED hash computation of all found files
|
||||||
|
insert_file_content_hashes(tx)?;
|
||||||
|
|
||||||
// SINGLE-THREADED
|
// after we've inserted all paths, prune deleted files to prevent double-recording deletions
|
||||||
// Extract files from current_files, in insertion order. For each row, if it's not a
|
prune_deleted(tx)?;
|
||||||
// directory compare parent column to see whether we've changed to a new parent, in which
|
|
||||||
// case we finalize this parent dir by recording its size and hash. Once a directory is
|
|
||||||
// finalized, we can update its row in current_files. The recorded time of a directory
|
|
||||||
// should be the max of all children recorded times.
|
|
||||||
let mut dirstack: Vec<(Hash256, usize, Context, f64)> = Vec::new();
|
|
||||||
|
|
||||||
// Find latest entries in filedir_version that are not deleted, and that do not appear in
|
compute_current_directory_hashes(tx)?;
|
||||||
// current_files, but whose parents _do_ appear. Create new versions marking these as deleted.
|
|
||||||
|
|
||||||
// For each entry in current_files, create a new filedir_version.
|
persist_temp_tables(tx, task_uuid, u)?;
|
||||||
|
|
||||||
// Find all entries in current_files that declare parents that are not None and are not in
|
|
||||||
// current_files. Each of these parents must be added with a new version. The content_hash must
|
|
||||||
// be derived from the existing and new file versions, which should be represented in
|
|
||||||
// filedir_version at this point. Add these in depth-first order.
|
|
||||||
|
|
||||||
// Drop the temporary current_files table
|
// Drop the temporary current_files table
|
||||||
|
log::debug!("Dropping temp table current_files");
|
||||||
|
tx.execute("DROP TABLE current_files", ())?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@ -77,10 +77,10 @@ fn init_schema(conn: &mut Connection, name: &str) -> Uuid {
|
|||||||
|
|
||||||
fn do_record(conn: &mut Connection, message: &str, paths: &[PathBuf], dataset_path: &Path) -> () {
|
fn do_record(conn: &mut Connection, message: &str, paths: &[PathBuf], dataset_path: &Path) -> () {
|
||||||
if let Err(e) = nancy::program::with_program(conn, "RECORD", message, |prog| {
|
if let Err(e) = nancy::program::with_program(conn, "RECORD", message, |prog| {
|
||||||
prog.perform_task(&[], |_task| {
|
prog.perform_task(&[], |task| {
|
||||||
// Note that this may fail, in which case we should roll back only this program
|
// Note that this may fail, in which case we should roll back only this program
|
||||||
// but keep the dataset initialized.
|
// but keep the dataset initialized.
|
||||||
nancy::fs::record(prog.transaction, paths, &dataset_path, message)
|
nancy::fs::record(prog.transaction, paths, &dataset_path, message, task.key)
|
||||||
})
|
})
|
||||||
}) {
|
}) {
|
||||||
log::error!("Encountered error in RECORD program: {:?}", e);
|
log::error!("Encountered error in RECORD program: {:?}", e);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user