2025-06-04 13:25:00 +00:00
|
|
|
mod checked;
|
2025-06-10 23:39:08 +00:00
|
|
|
mod chunk;
|
2025-06-04 13:25:00 +00:00
|
|
|
mod uploading;
|
2025-05-27 00:42:43 +00:00
|
|
|
|
2025-06-26 09:56:29 +00:00
|
|
|
use std::{ffi::OsStr, fs, io::Read, path::Path};
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 16:20:39 +00:00
|
|
|
use base64::prelude::{BASE64_STANDARD_NO_PAD as BASE64, Engine};
|
2025-06-24 19:34:11 +00:00
|
|
|
use blake2b_simd::Params as Blake2b;
|
2025-06-06 23:48:10 +00:00
|
|
|
|
2025-06-10 18:20:52 +00:00
|
|
|
pub use checked::Checked;
|
2025-06-10 23:39:08 +00:00
|
|
|
pub use chunk::Chunk;
|
2025-06-26 09:56:29 +00:00
|
|
|
use log::{debug, warn};
|
2025-06-10 18:20:52 +00:00
|
|
|
pub use uploading::Uploading;
|
2025-05-27 00:42:43 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// how many bytes to hash at once (default: 4 MiB)
|
|
|
|
|
const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024;
|
|
|
|
|
|
|
|
|
|
/// compute hash for a file given its path.
|
|
|
|
|
/// Hash function: BLAKE2b, 512 bit
|
|
|
|
|
///
|
|
|
|
|
/// # Params
|
|
|
|
|
///
|
|
|
|
|
/// - `path` to the file to hash
|
|
|
|
|
/// - `size` of that file
|
|
|
|
|
/// - `on_progress` will be called for each processed chunk (max. `HASH_CHUNK_SIZE`)
|
|
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
|
|
|
|
/// - from `fs::File::open` and `fs::File::read`
|
|
|
|
|
/// - Mismatch if given `size` does not match the file's size
|
2025-07-03 15:39:29 +00:00
|
|
|
fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result<String> {
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut file = fs::File::open(path)?;
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
// Blake2b-512 hasher (64 * 8 bit)
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut hasher = Blake2b::new().hash_length(64).to_state();
|
|
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
// buffer
|
|
|
|
|
let mut buffer = vec![0; HASH_CHUNK_SIZE];
|
2025-06-24 19:34:11 +00:00
|
|
|
let mut bytes_read = 0;
|
|
|
|
|
|
|
|
|
|
loop {
|
2025-07-03 18:27:41 +00:00
|
|
|
let n = file.read(&mut buffer)?;
|
2025-06-24 19:34:11 +00:00
|
|
|
if n == 0 {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2025-07-03 18:27:41 +00:00
|
|
|
hasher.update(&buffer[..n]);
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 15:39:29 +00:00
|
|
|
// `buf` size must be < 2 EiB
|
2025-06-24 19:34:11 +00:00
|
|
|
bytes_read += n as u64;
|
|
|
|
|
on_progress(n as u64);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if bytes_read != size {
|
2025-06-27 01:55:43 +00:00
|
|
|
return Err(crate::Error::mismatch(size, bytes_read));
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 14:20:30 +00:00
|
|
|
let result = BASE64.encode(hasher.finalize());
|
2025-06-25 10:44:36 +00:00
|
|
|
debug!("hashed {:?}: {result:?}", path.display());
|
|
|
|
|
Ok(result)
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// check hash for a file given its path, return Ok(()) on success
|
|
|
|
|
///
|
|
|
|
|
/// # Params
|
|
|
|
|
///
|
|
|
|
|
/// - `path` to the file to hash
|
|
|
|
|
/// - `size` of that file
|
|
|
|
|
/// - optional known `hash`
|
|
|
|
|
/// - `on_progress` will be called for each processed chunk (max. `HASH_CHUNK_SIZE`)
|
|
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
|
|
|
|
/// - from `file::compute_hash`
|
|
|
|
|
/// - Mismatch if `hash` is `None`
|
|
|
|
|
/// - Mismatch if given `hash` does not match the computed hash
|
2025-07-03 15:39:29 +00:00
|
|
|
fn check_hash(
|
2025-06-25 10:44:36 +00:00
|
|
|
path: &Path,
|
2025-06-24 19:34:11 +00:00
|
|
|
size: u64,
|
2025-07-03 16:18:46 +00:00
|
|
|
hash: Option<&str>,
|
2025-07-03 15:39:29 +00:00
|
|
|
on_progress: impl FnMut(u64),
|
2025-06-27 01:55:43 +00:00
|
|
|
) -> crate::Result<()> {
|
2025-07-03 18:27:41 +00:00
|
|
|
// check if hash is None
|
2025-06-26 09:56:29 +00:00
|
|
|
let Some(expected) = hash else {
|
2025-06-27 01:55:43 +00:00
|
|
|
return Err(crate::Error::mismatch("hash", path.display()));
|
2025-06-25 10:44:36 +00:00
|
|
|
};
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
// compute and check new hash
|
2025-07-03 15:39:29 +00:00
|
|
|
let actual = &compute_hash(path, size, on_progress)?;
|
2025-06-26 09:56:29 +00:00
|
|
|
|
|
|
|
|
if expected == actual {
|
|
|
|
|
debug!("hash matches {expected:?}");
|
|
|
|
|
Ok(())
|
|
|
|
|
} else {
|
|
|
|
|
warn!("hash mismatch for file {:?}", path.display());
|
2025-06-27 01:55:43 +00:00
|
|
|
Err(crate::Error::mismatch(expected, actual))
|
2025-06-26 09:56:29 +00:00
|
|
|
}
|
2025-06-24 19:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
2025-07-03 18:35:50 +00:00
|
|
|
pub trait FileTrait {
|
2025-06-06 23:48:10 +00:00
|
|
|
/// extract the filename part of a `Path` reference
|
|
|
|
|
///
|
|
|
|
|
/// # Panics
|
|
|
|
|
///
|
|
|
|
|
/// Expects `path::Path::file_name` and `ffi::OsStr::to_str` to succeed on the given path
|
2025-07-03 18:35:50 +00:00
|
|
|
fn extract_file_name(p: &Path) -> &str {
|
2025-06-06 23:48:10 +00:00
|
|
|
p.file_name()
|
|
|
|
|
.and_then(OsStr::to_str)
|
|
|
|
|
.expect("bad file name")
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-15 00:44:28 +00:00
|
|
|
/// get a reference to the file's name
|
2025-07-03 18:27:41 +00:00
|
|
|
///
|
|
|
|
|
/// Uses `file::FileTrait::extract_file_name`, which may **panic**!
|
2025-07-03 18:35:50 +00:00
|
|
|
fn get_name(&self) -> &str;
|
2025-06-06 23:48:10 +00:00
|
|
|
|
2025-06-15 00:44:28 +00:00
|
|
|
/// get the file's size
|
2025-06-06 23:48:10 +00:00
|
|
|
fn get_size(&self) -> u64;
|
2025-06-24 19:34:11 +00:00
|
|
|
|
2025-07-03 18:27:41 +00:00
|
|
|
/// check this file's hash, return Ok(()) on success
|
|
|
|
|
///
|
|
|
|
|
/// # Errors
|
|
|
|
|
///
|
|
|
|
|
/// - from `file::check_hash`
|
|
|
|
|
fn check_hash(&self, on_progress: impl FnMut(u64)) -> crate::Result<()>;
|
2025-06-06 23:48:10 +00:00
|
|
|
}
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use std::io::Write;
|
|
|
|
|
|
|
|
|
|
use tempfile::NamedTempFile;
|
|
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
/// Helper to create a temp file from `data`
|
|
|
|
|
fn create_file(data: &[u8]) -> NamedTempFile {
|
|
|
|
|
let mut tmp = NamedTempFile::new().expect("creating temp file");
|
|
|
|
|
tmp.write_all(data).expect("writing to tempfile");
|
|
|
|
|
tmp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static CASES: [(&[u8], u64); 8] = [
|
|
|
|
|
(b"The quick brown fox jumps over the lazy dog", 43), // common pangram
|
|
|
|
|
(b"hello world", 11), // simple greeting
|
|
|
|
|
(b"", 0), // empty slice
|
|
|
|
|
(b"x", 1), // single-byte
|
|
|
|
|
(b"0123456789", 10), // numeric ASCII
|
|
|
|
|
(b"!@#$%^&*()_+-=[]{};':,.<>/?", 27), // punctuation
|
|
|
|
|
(b"RustLang1337", 12), // mixed alphanumeric
|
|
|
|
|
(b"foo\0bar\0baz", 11), // embedded nulls
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
static HASHES: [&str; 8] = [
|
|
|
|
|
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram
|
|
|
|
|
"Ahzth5kpbOylV4MquUGlC0oR+DR4zxQfUfkz9lOrn7zAWgN83b7QbjCb8zSULE5YzfGkbiN5EczX/Pl4fLx/0A", // simple greeting
|
|
|
|
|
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
|
|
|
|
|
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", // single-byte
|
|
|
|
|
"UqCSwAW2Ib1X5QGgrtlQp2/vuwDQeqQ9rdb1NALMJUE3SfDTxi6MoKfbrjRIQa3qUdU/i2HZaaFdSmMYtXa4rA", // numeric ASCII
|
|
|
|
|
"Sr91qmX4R/Ly4HsJh5eiG3S1tuO81kwV0KPfRpn1j4jjrQoGL2I+SeKfcGvpXu3l/rfhGdJHF8ei775ZzdgK3Q", // punctuation
|
|
|
|
|
"Ox+zobaUmB8Ps410/TGOtjjLIJKaMUCwG/iFLNXjwRShuJAmtvQcK9Ahc9+SfD4Ci67HyPPorl7NGjN6LRrmlQ", // mixed alphanumeric
|
|
|
|
|
"a3rsGWE2kfvN6e2sVhioWP9NOmwLK9trzjc/GKXTPvvsiagiRSHMjlg5jy+bMepip68Pv69dY8TvTSFZES5Jzw", // embedded nulls
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn compute_hash_as_expected() {
|
|
|
|
|
for (&(content, size), expected_hash) in CASES.iter().zip(HASHES) {
|
|
|
|
|
// to capture progress updates from `compute_hash`
|
2025-07-03 16:18:46 +00:00
|
|
|
let file = create_file(content);
|
2025-07-03 15:39:29 +00:00
|
|
|
let mut read_total = 0;
|
|
|
|
|
let callback = |n| read_total += n;
|
|
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
let hash = compute_hash(file.path(), size, callback).expect("hash should succeed");
|
2025-07-03 15:39:29 +00:00
|
|
|
|
|
|
|
|
assert_eq!(hash, expected_hash);
|
|
|
|
|
assert_eq!(read_total, size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2025-07-03 16:18:46 +00:00
|
|
|
fn hash_size_mismatch() {
|
2025-07-03 15:39:29 +00:00
|
|
|
let bad_sizes = [
|
|
|
|
|
36, // common pangram
|
|
|
|
|
12, // simple greeting
|
|
|
|
|
1, // empty slice
|
|
|
|
|
0, // single-byte
|
|
|
|
|
9, // numeric ASCII
|
|
|
|
|
24, // punctuation
|
|
|
|
|
13, // mixed alphanumeric
|
|
|
|
|
10, // embedded nulls
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (&(content, good_size), bad_size) in CASES.iter().zip(bad_sizes) {
|
2025-07-03 16:18:46 +00:00
|
|
|
let file = create_file(content);
|
2025-07-03 15:39:29 +00:00
|
|
|
let callback = drop;
|
|
|
|
|
|
2025-07-03 17:20:56 +00:00
|
|
|
{
|
|
|
|
|
let err = compute_hash(file.path(), bad_size, callback)
|
|
|
|
|
.expect_err("compute_hash should report a mismatch");
|
|
|
|
|
|
|
|
|
|
// check error
|
|
|
|
|
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
let err = check_hash(file.path(), bad_size, Some("foobar"), callback)
|
|
|
|
|
.expect_err("check_hash should report a mismatch");
|
|
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
// check error
|
2025-07-03 17:20:56 +00:00
|
|
|
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn hash_value_none() {
|
|
|
|
|
for (content, size) in CASES {
|
|
|
|
|
let file = create_file(content);
|
|
|
|
|
let callback = drop;
|
|
|
|
|
|
|
|
|
|
let err = check_hash(file.path(), size, None, callback)
|
2025-07-03 16:18:46 +00:00
|
|
|
.expect_err("check_hash should report a mismatch");
|
2025-07-03 17:20:56 +00:00
|
|
|
|
|
|
|
|
// check error
|
|
|
|
|
assert!(err.is_mismatch("hash", file.path().display().to_string()));
|
2025-07-03 16:18:46 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn hash_value_mismatch() {
|
|
|
|
|
let bad_hashes = [
|
|
|
|
|
"invalid9k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram
|
|
|
|
|
"", // simple greeting
|
|
|
|
|
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiG/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
|
|
|
|
|
"Hash", // single-byte
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for ((&(content, size), good_hash), bad_hash) in CASES.iter().zip(HASHES).zip(bad_hashes) {
|
|
|
|
|
let file = create_file(content);
|
|
|
|
|
let callback = drop;
|
|
|
|
|
|
2025-07-03 17:20:56 +00:00
|
|
|
let err = check_hash(file.path(), size, Some(bad_hash), callback)
|
2025-07-03 16:18:46 +00:00
|
|
|
.expect_err("check_hash should report a mismatch");
|
2025-07-03 15:39:29 +00:00
|
|
|
|
2025-07-03 16:18:46 +00:00
|
|
|
// check error
|
2025-07-03 17:20:56 +00:00
|
|
|
assert!(err.is_mismatch(bad_hash, good_hash));
|
2025-07-03 15:39:29 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|