shrupl/src/file/mod.rs
Jörn-Michael Miehe 53fdb6cc97 [wip] unit tests
- use `unwrap` while testing
2025-07-10 13:19:27 +00:00

191 lines
5.2 KiB
Rust

mod checked;
mod chunk;
mod uploading;
use std::{ffi::OsStr, fs, io::Read, path::Path};
use base64::prelude::{BASE64_STANDARD_NO_PAD as BASE64, Engine};
use blake2b_simd::Params as Blake2b;
pub use checked::Checked;
pub use chunk::Chunk;
use log::{debug, warn};
pub use uploading::Uploading;
/// how many bytes to hash at once (default: 4 MiB)
const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024;
/// compute hash for a file given its path.
/// Hash function: `BLAKE2b`, 512 bit
///
/// # Params
///
/// - `path` to the file to hash
/// - `size` of that file
/// - `on_progress` will be called for each processed chunk (max. `HASH_CHUNK_SIZE`)
///
/// # Errors
///
/// - from `fs::File::{open, read}`
/// - Mismatch if given `size` does not match the file's size
fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result<String> {
let mut file = fs::File::open(path)?;
// Blake2b-512 hasher (64 * 8 bit)
let mut hasher = Blake2b::new().hash_length(64).to_state();
// buffer
let mut buffer = vec![0; HASH_CHUNK_SIZE];
let mut bytes_read = 0;
loop {
let n = file.read(&mut buffer)?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
// `buf` size must be < 2 EiB
bytes_read += n as u64;
on_progress(n as u64);
}
if bytes_read != size {
return crate::Error::mismatch(size, bytes_read);
}
let result = BASE64.encode(hasher.finalize());
debug!("hashed {:?}: {result:?}", path.display());
Ok(result)
}
/// check hash for a file given its path, return Ok(()) on success
///
/// # Params
///
/// - everything from `compute_hash`
/// - optionally, known `hash`
///
/// # Errors
///
/// - from `file::compute_hash`
/// - Mismatch if `hash` is `None`
/// - Mismatch if given `hash` does not match the computed hash
fn check_hash(
path: &Path,
size: u64,
hash: Option<&str>,
on_progress: impl FnMut(u64),
) -> crate::Result<()> {
// check if hash is None
let Some(expected) = hash else {
return crate::Error::mismatch("hash", path.display());
};
// compute and check new hash
let actual = &compute_hash(path, size, on_progress)?;
if expected == actual {
debug!("hash matches {expected:?}");
Ok(())
} else {
warn!("hash mismatch for file {:?}", path.display());
crate::Error::mismatch(expected, actual)
}
}
pub trait FileTrait {
/// extract the filename part of a `Path` reference
///
/// # Panics
///
/// Expects `path::Path::file_name` and `ffi::OsStr::to_str` to succeed on the given path
fn extract_file_name(p: &Path) -> &str {
p.file_name()
.and_then(OsStr::to_str)
.expect("bad file name")
}
/// get a reference to the file's name
///
/// Uses `file::FileTrait::extract_file_name`, which may **panic**!
fn get_name(&self) -> &str;
/// get the file's size
fn get_size(&self) -> u64;
/// check this file's hash, return Ok(()) on success
///
/// # Errors
///
/// - from `file::check_hash`
fn check_hash(&self, on_progress: impl FnMut(u64)) -> crate::Result<()>;
}
#[cfg(test)]
mod tests {
use crate::test_util::{
create_file,
data::{DATA_LENGTHS_BAD, HASHES_STD_BAD, HASHES_STD_GOOD, cases, cases_with},
};
use super::*;
#[test]
fn compute_hash_as_expected() {
for (content, size, expected_hash) in cases_with(HASHES_STD_GOOD) {
let file = create_file(content);
// to capture progress updates from `compute_hash`
let mut read_total = 0;
let callback = |n| read_total += n;
let hash = compute_hash(file.path(), size, callback).unwrap();
assert_eq!(hash, expected_hash);
assert_eq!(read_total, size);
}
}
#[test]
fn hash_size_mismatch() {
for (content, good_size, bad_size) in cases_with(DATA_LENGTHS_BAD) {
let file = create_file(content);
{
// `compute_hash` with bad size
let err = compute_hash(file.path(), bad_size, drop).unwrap_err();
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
}
{
// `check_hash` with bad size
let err = check_hash(file.path(), bad_size, Some("foobar"), drop).unwrap_err();
assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string()));
}
}
}
#[test]
fn hash_value_none() {
for (content, size) in cases() {
let file = create_file(content);
// `check_hash` with no hash
let err = check_hash(file.path(), size, None, drop).unwrap_err();
assert!(err.is_mismatch("hash", file.path().display().to_string()));
}
}
#[test]
fn hash_value_mismatch() {
for ((content, size, good_hash), bad_hash) in
cases_with(HASHES_STD_GOOD).zip(HASHES_STD_BAD)
{
let file = create_file(content);
// `check_hash` with bad hash
let err = check_hash(file.path(), size, Some(bad_hash), drop).unwrap_err();
assert!(err.is_mismatch(bad_hash, good_hash));
}
}
}