2025-06-04 13:25:00 +00:00
|
|
|
mod checked;
|
2025-06-10 23:39:08 +00:00
|
|
|
mod chunk;
|
2025-06-04 13:25:00 +00:00
|
|
|
mod uploading;
|
2025-05-27 00:42:43 +00:00
|
|
|
|
2025-06-26 09:56:29 +00:00
|
|
|
use std::{ffi::OsStr, fs, io::Read, path::Path};
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 16:20:39 +00:00
|
|
|
use base64::prelude::{BASE64_STANDARD_NO_PAD as BASE64, Engine};
|
2025-06-24 19:34:11 +00:00
|
|
|
use blake2b_simd::Params as Blake2b;
|
2025-06-06 23:48:10 +00:00
|
|
|
|
2025-06-10 18:20:52 +00:00
|
|
|
pub use checked::Checked;
|
2025-06-10 23:39:08 +00:00
|
|
|
pub use chunk::Chunk;
|
2025-06-26 09:56:29 +00:00
|
|
|
use log::{debug, warn};
|
2025-06-10 18:20:52 +00:00
|
|
|
pub use uploading::Uploading;
|
2025-05-27 00:42:43 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// how many bytes to hash at once (default: 4 MiB)
|
|
|
|
|
const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024;
|
|
|
|
|
|
|
|
|
|
/// compute hash for a file given its path.
|
2025-07-03 19:13:54 +00:00
|
|
|
/// Hash function: `BLAKE2b`, 512 bit
|
2025-07-03 18:27:41 +00:00
|
|
|
///
|
|
|
|
|
/// # Params
|
|
|
|
|
///
|
|
|
|
|
/// - `path` to the file to hash
|
|
|
|
|
/// - `size` of that file
|
|
|
|
|
/// - `on_progress` will be called for each processed chunk (max. `HASH_CHUNK_SIZE`)
|
|
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
2025-07-03 22:46:19 +00:00
|
|
|
/// - from `fs::File::{open, read}`
|
2025-07-03 18:27:41 +00:00
|
|
|
/// - Mismatch if given `size` does not match the file's size
|
2025-07-03 15:39:29 +00:00
|
|
|
fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result<String> {
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut file = fs::File::open(path)?;
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
// Blake2b-512 hasher (64 * 8 bit)
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut hasher = Blake2b::new().hash_length(64).to_state();
|
|
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
// buffer
|
|
|
|
|
let mut buffer = vec![0; HASH_CHUNK_SIZE];
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut bytes_read = 0;
|
|
|
|
|
|
|
|
|
|
loop {
|
2025-07-03 18:27:41 +00:00
|
|
|
let n = file.read(&mut buffer)?;
|
2025-06-24 19:34:11 +00:00
|
|
|
if n == 0 {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2025-07-03 18:27:41 +00:00
|
|
|
hasher.update(&buffer[..n]);
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 15:39:29 +00:00
|
|
|
// `buf` size must be < 2 EiB
|
2025-06-24 19:34:11 +00:00
|
|
|
bytes_read += n as u64;
|
|
|
|
|
on_progress(n as u64);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if bytes_read != size {
|
2025-07-06 00:01:46 +00:00
|
|
|
return crate::Error::mismatch(size, bytes_read);
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 14:20:30 +00:00
|
|
|
let result = BASE64.encode(hasher.finalize());
|
2025-06-25 10:44:36 +00:00
|
|
|
debug!("hashed {:?}: {result:?}", path.display());
|
|
|
|
|
Ok(result)
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// check hash for a file given its path, return Ok(()) on success
|
|
|
|
|
///
|
|
|
|
|
/// # Params
|
|
|
|
|
///
|
2025-07-03 22:46:19 +00:00
|
|
|
/// - everything from `compute_hash`
|
|
|
|
|
/// - optionally, known `hash`
|
2025-07-03 18:27:41 +00:00
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
|
|
|
|
/// - from `file::compute_hash`
|
|
|
|
|
/// - Mismatch if `hash` is `None`
|
|
|
|
|
/// - Mismatch if given `hash` does not match the computed hash
|
2025-07-03 15:39:29 +00:00
|
|
|
fn check_hash(
|
2025-06-25 10:44:36 +00:00
|
|
|
path: &Path,
|
2025-06-24 19:34:11 +00:00
|
|
|
size: u64,
|
2025-07-03 16:18:46 +00:00
|
|
|
hash: Option<&str>,
|
2025-07-03 15:39:29 +00:00
|
|
|
on_progress: impl FnMut(u64),
|
2025-06-27 01:55:43 +00:00
|
|
|
) -> crate::Result<()> {
|
2025-07-03 18:27:41 +00:00
|
|
|
// check if hash is None
|
2025-06-26 09:56:29 +00:00
|
|
|
let Some(expected) = hash else {
|
2025-07-06 00:01:46 +00:00
|
|
|
return crate::Error::mismatch("hash", path.display());
|
2025-06-25 10:44:36 +00:00
|
|
|
};
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
// compute and check new hash
|
2025-07-03 15:39:29 +00:00
|
|
|
let actual = &compute_hash(path, size, on_progress)?;
|
2025-06-26 09:56:29 +00:00
|
|
|
|
|
|
|
|
if expected == actual {
|
|
|
|
|
debug!("hash matches {expected:?}");
|
|
|
|
|
Ok(())
|
|
|
|
|
} else {
|
|
|
|
|
warn!("hash mismatch for file {:?}", path.display());
|
2025-07-06 00:01:46 +00:00
|
|
|
crate::Error::mismatch(expected, actual)
|
2025-06-26 09:56:29 +00:00
|
|
|
}
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 18:35:50 +00:00
|
|
|
pub trait FileTrait {
|
2025-06-06 23:48:10 +00:00
|
|
|
/// extract the filename part of a `Path` reference
|
|
|
|
|
///
|
|
|
|
|
/// # Panics
|
|
|
|
|
///
|
|
|
|
|
/// Expects `path::Path::file_name` and `ffi::OsStr::to_str` to succeed on the given path
|
2025-07-03 18:35:50 +00:00
|
|
|
fn extract_file_name(p: &Path) -> &str {
|
2025-06-06 23:48:10 +00:00
|
|
|
p.file_name()
|
|
|
|
|
.and_then(OsStr::to_str)
|
|
|
|
|
.expect("bad file name")
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-15 00:44:28 +00:00
|
|
|
/// get a reference to the file's name
|
2025-07-03 18:27:41 +00:00
|
|
|
///
|
|
|
|
|
/// Uses `file::FileTrait::extract_file_name`, which may **panic**!
|
2025-07-03 18:35:50 +00:00
|
|
|
fn get_name(&self) -> &str;
|
2025-06-06 23:48:10 +00:00
|
|
|
|
2025-06-15 00:44:28 +00:00
|
|
|
/// get the file's size
|
2025-06-06 23:48:10 +00:00
|
|
|
fn get_size(&self) -> u64;
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// check this file's hash, return Ok(()) on success
|
|
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
|
|
|
|
/// - from `file::check_hash`
|
|
|
|
|
fn check_hash(&self, on_progress: impl FnMut(u64)) -> crate::Result<()>;
|
2025-06-06 23:48:10 +00:00
|
|
|
}
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
2025-07-05 01:24:53 +00:00
|
|
|
use crate::test_util::{
|
|
|
|
|
create_file,
|
|
|
|
|
data::{DATA_LENGTHS_BAD, HASHES_STD_BAD, HASHES_STD_GOOD, cases, cases_with},
|
|
|
|
|
};
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn compute_hash_as_expected() {
|
2025-07-05 01:24:53 +00:00
|
|
|
for (content, size, expected_hash) in cases_with(HASHES_STD_GOOD) {
|
2025-07-03 16:18:46 +00:00
|
|
|
let file = create_file(content);
|
2025-07-04 17:06:01 +00:00
|
|
|
|
|
|
|
|
// to capture progress updates from `compute_hash`
|
2025-07-03 15:39:29 +00:00
|
|
|
let mut read_total = 0;
|
|
|
|
|
let callback = |n| read_total += n;
|
|
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
let hash = compute_hash(file.path(), size, callback).expect("hash should succeed");
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
assert_eq!(hash, expected_hash);
|
|
|
|
|
assert_eq!(read_total, size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2025-07-03 16:18:46 +00:00
|
|
|
fn hash_size_mismatch() {
|
2025-07-05 01:24:53 +00:00
|
|
|
for (content, good_size, bad_size) in cases_with(DATA_LENGTHS_BAD) {
|
2025-07-03 16:18:46 +00:00
|
|
|
let file = create_file(content);
|
2025-07-03 15:39:29 +00:00
|
|
|
let callback = drop;
|
|
|
|
|
|
2025-07-03 17:20:56 +00:00
|
|
|
{
|
|
|
|
|
let err = compute_hash(file.path(), bad_size, callback)
|
|
|
|
|
.expect_err("compute_hash should report a mismatch");
|
|
|
|
|
|
|
|
|
|
// check error
|
|
|
|
|
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
let err = check_hash(file.path(), bad_size, Some("foobar"), callback)
|
|
|
|
|
.expect_err("check_hash should report a mismatch");
|
|
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
// check error
|
2025-07-03 17:20:56 +00:00
|
|
|
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn hash_value_none() {
|
2025-07-05 01:24:53 +00:00
|
|
|
for (content, size) in cases() {
|
2025-07-03 17:20:56 +00:00
|
|
|
let file = create_file(content);
|
|
|
|
|
let callback = drop;
|
|
|
|
|
|
|
|
|
|
let err = check_hash(file.path(), size, None, callback)
|
2025-07-03 16:18:46 +00:00
|
|
|
.expect_err("check_hash should report a mismatch");
|
2025-07-03 17:20:56 +00:00
|
|
|
|
|
|
|
|
// check error
|
|
|
|
|
assert!(err.is_mismatch("hash", file.path().display().to_string()));
|
2025-07-03 16:18:46 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn hash_value_mismatch() {
|
2025-07-05 01:24:53 +00:00
|
|
|
for ((content, size, good_hash), bad_hash) in
|
|
|
|
|
cases_with(HASHES_STD_GOOD).zip(HASHES_STD_BAD)
|
|
|
|
|
{
|
2025-07-03 16:18:46 +00:00
|
|
|
let file = create_file(content);
|
|
|
|
|
let callback = drop;
|
|
|
|
|
|
2025-07-03 17:20:56 +00:00
|
|
|
let err = check_hash(file.path(), size, Some(bad_hash), callback)
|
2025-07-03 16:18:46 +00:00
|
|
|
.expect_err("check_hash should report a mismatch");
|
2025-07-03 15:39:29 +00:00
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
// check error
|
2025-07-03 17:20:56 +00:00
|
|
|
assert!(err.is_mismatch(bad_hash, good_hash));
|
2025-07-03 15:39:29 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|