From cca35e1ae820d46c454636518e6bfec9f51f141d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:39:29 +0000 Subject: [PATCH] [wip] unit tests for `file` module - testing for `compute_hash` --- Cargo.lock | 91 ++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 3 ++ src/file/checked.rs | 4 +- src/file/mod.rs | 98 ++++++++++++++++++++++++++++++++++++++++--- src/file/uploading.rs | 2 +- 5 files changed, 186 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3d39562..22a2c88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,22 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "flate2" version = "1.1.1" @@ -377,7 +393,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -600,6 +628,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.8.0" @@ -723,13 +757,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "redox_users" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror 1.0.69", ] @@ -771,12 +811,25 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom", + "getrandom 0.2.16", "libc", "untrusted", "windows-sys 0.52.0", ] +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rustls" version = "0.23.27" @@ -888,6 +941,7 @@ dependencies = [ "regex", "serde", "serde_json", + "tempfile", "thiserror 2.0.12", "ureq", ] @@ -938,6 +992,19 @@ dependencies = [ "syn", ] +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -1111,6 +1178,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -1300,6 +1376,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "writeable" version = "0.6.1" diff --git a/Cargo.toml b/Cargo.toml index 18b12cd..aa0f373 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,9 @@ serde_json = "1.0.140" thiserror = "2.0.12" ureq = { version = "3.0.11", features = ["json"] } +[dev-dependencies] +tempfile = "3.20.0" + [profile.release] # Optimize for speed even more aggressively opt-level = "z" diff --git a/src/file/checked.rs b/src/file/checked.rs index 7aa4d74..56a4067 100644 --- a/src/file/checked.rs +++ b/src/file/checked.rs @@ -59,7 +59,7 @@ impl Checked { return Err(crate::Error::mismatch("unhashed file", self.path.display())); } - self.hash = Some(super::compute_file_hash(&self.path, self.size, f)?); + self.hash = Some(super::compute_hash(&self.path, self.size, f)?); Ok(()) } @@ -100,6 +100,6 @@ impl<'t> FileTrait<'t> for Checked { } fn check_hash(&self, on_progress: impl Fn(u64)) -> crate::Result<()> { - super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress) + super::check_hash(&self.path, self.size, self.hash.as_ref(), on_progress) } } diff --git a/src/file/mod.rs b/src/file/mod.rs index e7ec53a..fc61acf 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -12,12 +12,14 @@ pub use chunk::Chunk; use log::{debug, warn}; pub use uploading::Uploading; - -fn compute_file_hash(path: &Path, size: u64, on_progress: impl Fn(u64)) -> crate::Result { +fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result { let mut file = fs::File::open(path)?; + + // Blake2b-512 hasher (64 * 8 bit) let mut hasher = Blake2b::new().hash_length(64).to_state(); - let mut buf = vec![0u8; 4 * 1024 * 1024]; + // buffer (4 MiB) + let mut buf = vec![0; 4 * 1024 * 1024]; let mut bytes_read = 0; loop { @@ -27,6 +29,7 @@ fn compute_file_hash(path: &Path, size: u64, on_progress: impl Fn(u64)) -> crate } hasher.update(&buf[..n]); + // `buf` size must be < 2 EiB bytes_read += n as u64; on_progress(n as u64); } @@ -40,17 +43,17 @@ fn compute_file_hash(path: &Path, size: u64, on_progress: impl Fn(u64)) -> crate Ok(result) } -fn check_file_hash( +fn check_hash( path: &Path, size: u64, hash: Option<&String>, - on_progress: impl Fn(u64), + on_progress: impl FnMut(u64), ) -> crate::Result<()> { let Some(expected) = hash else { return Err(crate::Error::mismatch("hash", path.display())); }; - let actual = &compute_file_hash(path, size, on_progress)?; + let actual = &compute_hash(path, size, on_progress)?; if expected == actual { debug!("hash matches {expected:?}"); @@ -81,3 +84,86 @@ pub trait FileTrait<'t> { fn check_hash(&self, on_progress: impl Fn(u64)) -> crate::Result<()>; } + +#[cfg(test)] +mod tests { + use std::io::Write; + + use tempfile::NamedTempFile; + + use super::*; + + /// Helper to create a temp file from `data` + fn create_file(data: &[u8]) -> NamedTempFile { + let mut tmp = NamedTempFile::new().expect("creating temp file"); + tmp.write_all(data).expect("writing to tempfile"); + tmp + } + + static CASES: [(&[u8], u64); 8] = [ + (b"The quick brown fox jumps over the lazy dog", 43), // common pangram + (b"hello world", 11), // simple greeting + (b"", 0), // empty slice + (b"x", 1), // single-byte + (b"0123456789", 10), // numeric ASCII + (b"!@#$%^&*()_+-=[]{};':,.<>/?", 27), // punctuation + (b"RustLang1337", 12), // mixed alphanumeric + (b"foo\0bar\0baz", 11), // embedded nulls + ]; + + static HASHES: [&str; 8] = [ + "qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram + "Ahzth5kpbOylV4MquUGlC0oR+DR4zxQfUfkz9lOrn7zAWgN83b7QbjCb8zSULE5YzfGkbiN5EczX/Pl4fLx/0A", // simple greeting + "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice + "CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", // single-byte + "UqCSwAW2Ib1X5QGgrtlQp2/vuwDQeqQ9rdb1NALMJUE3SfDTxi6MoKfbrjRIQa3qUdU/i2HZaaFdSmMYtXa4rA", // numeric ASCII + "Sr91qmX4R/Ly4HsJh5eiG3S1tuO81kwV0KPfRpn1j4jjrQoGL2I+SeKfcGvpXu3l/rfhGdJHF8ei775ZzdgK3Q", // punctuation + "Ox+zobaUmB8Ps410/TGOtjjLIJKaMUCwG/iFLNXjwRShuJAmtvQcK9Ahc9+SfD4Ci67HyPPorl7NGjN6LRrmlQ", // mixed alphanumeric + "a3rsGWE2kfvN6e2sVhioWP9NOmwLK9trzjc/GKXTPvvsiagiRSHMjlg5jy+bMepip68Pv69dY8TvTSFZES5Jzw", // embedded nulls + ]; + + #[test] + fn compute_hash_as_expected() { + for (&(content, size), expected_hash) in CASES.iter().zip(HASHES) { + // to capture progress updates from `compute_hash` + let mut read_total = 0; + let callback = |n| read_total += n; + + let hash = compute_hash(create_file(content).path(), size, callback) + .expect("hash should succeed"); + + assert_eq!(hash, expected_hash); + assert_eq!(read_total, size); + } + } + + #[test] + fn compute_hash_size_mismatch() { + let bad_sizes = [ + 36, // common pangram + 12, // simple greeting + 1, // empty slice + 0, // single-byte + 9, // numeric ASCII + 24, // punctuation + 13, // mixed alphanumeric + 10, // embedded nulls + ]; + + for (&(content, good_size), bad_size) in CASES.iter().zip(bad_sizes) { + let callback = drop; + + let err = compute_hash(create_file(content).path(), bad_size, callback) + .expect_err("should get a size-mismatch error"); + + // make sure it's the Mismatch variant, and that it contains the original input + match err { + crate::Error::Mismatch { expected, actual } => { + assert_eq!(expected, bad_size.to_string()); + assert_eq!(actual, good_size.to_string()); + } + _ => panic!("Expected Error::Mismatch for input size `{bad_size}` but got {err:?}"), + } + } + } +} diff --git a/src/file/uploading.rs b/src/file/uploading.rs index 8ac6eea..3fe45b9 100644 --- a/src/file/uploading.rs +++ b/src/file/uploading.rs @@ -107,6 +107,6 @@ impl<'t> FileTrait<'t> for Uploading { } fn check_hash(&self, on_progress: impl Fn(u64)) -> crate::Result<()> { - super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress) + super::check_hash(&self.path, self.size, self.hash.as_ref(), on_progress) } }