shrupl/src/file/mod.rs

207 lines
7 KiB
Rust
Raw Normal View History

mod checked;
mod chunk;
mod uploading;
2025-05-27 00:42:43 +00:00
2025-06-26 09:56:29 +00:00
use std::{ffi::OsStr, fs, io::Read, path::Path};
use base64::{Engine, prelude::BASE64_STANDARD_NO_PAD as BASE64};
use blake2b_simd::Params as Blake2b;
pub use checked::Checked;
pub use chunk::Chunk;
2025-06-26 09:56:29 +00:00
use log::{debug, warn};
pub use uploading::Uploading;
2025-05-27 00:42:43 +00:00
fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result<String> {
let mut file = fs::File::open(path)?;
// Blake2b-512 hasher (64 * 8 bit)
let mut hasher = Blake2b::new().hash_length(64).to_state();
// buffer (4 MiB)
let mut buf = vec![0; 4 * 1024 * 1024];
let mut bytes_read = 0;
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
// `buf` size must be < 2 EiB
bytes_read += n as u64;
on_progress(n as u64);
}
if bytes_read != size {
2025-06-27 01:55:43 +00:00
return Err(crate::Error::mismatch(size, bytes_read));
}
let result = BASE64.encode(hasher.finalize());
debug!("hashed {:?}: {result:?}", path.display());
Ok(result)
}
fn check_hash(
path: &Path,
size: u64,
hash: Option<&str>,
on_progress: impl FnMut(u64),
2025-06-27 01:55:43 +00:00
) -> crate::Result<()> {
2025-06-26 09:56:29 +00:00
let Some(expected) = hash else {
2025-06-27 01:55:43 +00:00
return Err(crate::Error::mismatch("hash", path.display()));
};
let actual = &compute_hash(path, size, on_progress)?;
2025-06-26 09:56:29 +00:00
if expected == actual {
debug!("hash matches {expected:?}");
Ok(())
} else {
warn!("hash mismatch for file {:?}", path.display());
2025-06-27 01:55:43 +00:00
Err(crate::Error::mismatch(expected, actual))
2025-06-26 09:56:29 +00:00
}
}
pub trait FileTrait<'t> {
/// extract the filename part of a `Path` reference
///
/// # Panics
///
/// Expects `path::Path::file_name` and `ffi::OsStr::to_str` to succeed on the given path
fn extract_file_name(p: &'t Path) -> &'t str {
p.file_name()
.and_then(OsStr::to_str)
.expect("bad file name")
}
/// get a reference to the file's name
fn get_name(&'t self) -> &'t str;
/// get the file's size
fn get_size(&self) -> u64;
2025-06-27 01:55:43 +00:00
fn check_hash(&self, on_progress: impl Fn(u64)) -> crate::Result<()>;
}
#[cfg(test)]
mod tests {
use std::io::Write;
use tempfile::NamedTempFile;
use super::*;
/// Helper to create a temp file from `data`
fn create_file(data: &[u8]) -> NamedTempFile {
let mut tmp = NamedTempFile::new().expect("creating temp file");
tmp.write_all(data).expect("writing to tempfile");
tmp
}
static CASES: [(&[u8], u64); 8] = [
(b"The quick brown fox jumps over the lazy dog", 43), // common pangram
(b"hello world", 11), // simple greeting
(b"", 0), // empty slice
(b"x", 1), // single-byte
(b"0123456789", 10), // numeric ASCII
(b"!@#$%^&*()_+-=[]{};':,.<>/?", 27), // punctuation
(b"RustLang1337", 12), // mixed alphanumeric
(b"foo\0bar\0baz", 11), // embedded nulls
];
static HASHES: [&str; 8] = [
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram
"Ahzth5kpbOylV4MquUGlC0oR+DR4zxQfUfkz9lOrn7zAWgN83b7QbjCb8zSULE5YzfGkbiN5EczX/Pl4fLx/0A", // simple greeting
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", // single-byte
"UqCSwAW2Ib1X5QGgrtlQp2/vuwDQeqQ9rdb1NALMJUE3SfDTxi6MoKfbrjRIQa3qUdU/i2HZaaFdSmMYtXa4rA", // numeric ASCII
"Sr91qmX4R/Ly4HsJh5eiG3S1tuO81kwV0KPfRpn1j4jjrQoGL2I+SeKfcGvpXu3l/rfhGdJHF8ei775ZzdgK3Q", // punctuation
"Ox+zobaUmB8Ps410/TGOtjjLIJKaMUCwG/iFLNXjwRShuJAmtvQcK9Ahc9+SfD4Ci67HyPPorl7NGjN6LRrmlQ", // mixed alphanumeric
"a3rsGWE2kfvN6e2sVhioWP9NOmwLK9trzjc/GKXTPvvsiagiRSHMjlg5jy+bMepip68Pv69dY8TvTSFZES5Jzw", // embedded nulls
];
#[test]
fn compute_hash_as_expected() {
for (&(content, size), expected_hash) in CASES.iter().zip(HASHES) {
// to capture progress updates from `compute_hash`
let file = create_file(content);
let mut read_total = 0;
let callback = |n| read_total += n;
let hash = compute_hash(file.path(), size, callback).expect("hash should succeed");
assert_eq!(hash, expected_hash);
assert_eq!(read_total, size);
}
}
#[test]
fn hash_size_mismatch() {
let bad_sizes = [
36, // common pangram
12, // simple greeting
1, // empty slice
0, // single-byte
9, // numeric ASCII
24, // punctuation
13, // mixed alphanumeric
10, // embedded nulls
];
for (&(content, good_size), bad_size) in CASES.iter().zip(bad_sizes) {
let file = create_file(content);
let callback = drop;
let err_callback = |err| {
// check error
match err {
crate::Error::Mismatch { expected, actual } => {
assert_eq!(expected, bad_size.to_string());
assert_eq!(actual, good_size.to_string());
}
_ => panic!(
"Expected Error::Mismatch for input size `{bad_size}` but got {err:?}"
),
};
};
compute_hash(file.path(), bad_size, callback)
.map_err(err_callback)
.expect_err("compute_hash should report a mismatch");
check_hash(file.path(), bad_size, Some("foobar"), callback)
.map_err(err_callback)
.expect_err("check_hash should report a mismatch");
}
}
#[test]
fn hash_value_mismatch() {
let bad_hashes = [
"invalid9k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram
"", // simple greeting
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiG/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
"Hash", // single-byte
];
for ((&(content, size), good_hash), bad_hash) in CASES.iter().zip(HASHES).zip(bad_hashes) {
let file = create_file(content);
let callback = drop;
let err = check_hash(file.path(), size, Some(&bad_hash.to_string()), callback)
.expect_err("check_hash should report a mismatch");
// check error
match err {
crate::Error::Mismatch { expected, actual } => {
assert_eq!(expected, bad_hash);
assert_eq!(actual, good_hash);
}
_ => panic!("Expected Error::Mismatch but got {err:?}"),
}
}
}
}