From f988c80854e924db81847f667e4d9c27e682c4e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Sat, 5 Jul 2025 01:24:53 +0000 Subject: [PATCH] [wip] unit tests for `file` module - move test data to `test_util::data` --- src/file/checked.rs | 12 ++--- src/file/chunk.rs | 11 ++++ src/file/mod.rs | 55 ++++---------------- src/test_util/data.rs | 118 ++++++++++++++++++++++++++++++++++++++++++ src/test_util/mod.rs | 2 + 5 files changed, 147 insertions(+), 51 deletions(-) create mode 100644 src/test_util/data.rs diff --git a/src/file/checked.rs b/src/file/checked.rs index 8bbe215..2adf26e 100644 --- a/src/file/checked.rs +++ b/src/file/checked.rs @@ -119,16 +119,16 @@ mod tests { use tempfile::TempDir; - use crate::{ - file::tests::{CASES, HASHES}, - test_util::create_file, + use crate::test_util::{ + create_file, + data::{HASHES_STD_GOOD, cases, data}, }; use super::*; #[test] fn new_on_existing_file_works() { - for (content, size) in CASES { + for (content, size) in cases() { let file = create_file(content); let chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); @@ -180,7 +180,7 @@ mod tests { #[test] fn hashing_works() { - for (&(content, _), hash) in CASES.iter().zip(HASHES) { + for (content, hash) in data().zip(HASHES_STD_GOOD) { let file = create_file(content); let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); @@ -194,7 +194,7 @@ mod tests { #[test] fn hashing_again_errors() { - for (content, _) in CASES { + for content in data() { let file = create_file(content); let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); diff --git a/src/file/chunk.rs b/src/file/chunk.rs index c408cae..d3025b8 100644 --- a/src/file/chunk.rs +++ b/src/file/chunk.rs @@ -52,3 +52,14 @@ impl<'t> Chunk<'t> { self.offset + self.get_length() } } + +#[cfg(test)] +mod tests { + // use super::*; + + // #[test] + // fn basic_tests() { + // let mut foo = [0u8; 10]; + // let fid = sharry::FileID("fid".to_string()); + // } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 3d42b8a..246df07 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -124,35 +124,16 @@ pub trait FileTrait { #[cfg(test)] mod tests { - use crate::test_util::create_file; + use crate::test_util::{ + create_file, + data::{DATA_LENGTHS_BAD, HASHES_STD_BAD, HASHES_STD_GOOD, cases, cases_with}, + }; use super::*; - pub static CASES: [(&[u8], u64); 8] = [ - (b"The quick brown fox jumps over the lazy dog", 43), // common pangram - (b"hello world", 11), // simple greeting - (b"", 0), // empty slice - (b"x", 1), // single-byte - (b"0123456789", 10), // numeric ASCII - (b"!@#$%^&*()_+-=[]{};':,.<>/?", 27), // punctuation - (b"RustLang1337", 12), // mixed alphanumeric - (b"foo\0bar\0baz", 11), // embedded nulls - ]; - - pub static HASHES: [&str; 8] = [ - "qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram - "Ahzth5kpbOylV4MquUGlC0oR+DR4zxQfUfkz9lOrn7zAWgN83b7QbjCb8zSULE5YzfGkbiN5EczX/Pl4fLx/0A", // simple greeting - "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice - "CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", // single-byte - "UqCSwAW2Ib1X5QGgrtlQp2/vuwDQeqQ9rdb1NALMJUE3SfDTxi6MoKfbrjRIQa3qUdU/i2HZaaFdSmMYtXa4rA", // numeric ASCII - "Sr91qmX4R/Ly4HsJh5eiG3S1tuO81kwV0KPfRpn1j4jjrQoGL2I+SeKfcGvpXu3l/rfhGdJHF8ei775ZzdgK3Q", // punctuation - "Ox+zobaUmB8Ps410/TGOtjjLIJKaMUCwG/iFLNXjwRShuJAmtvQcK9Ahc9+SfD4Ci67HyPPorl7NGjN6LRrmlQ", // mixed alphanumeric - "a3rsGWE2kfvN6e2sVhioWP9NOmwLK9trzjc/GKXTPvvsiagiRSHMjlg5jy+bMepip68Pv69dY8TvTSFZES5Jzw", // embedded nulls - ]; - #[test] fn compute_hash_as_expected() { - for (&(content, size), expected_hash) in CASES.iter().zip(HASHES) { + for (content, size, expected_hash) in cases_with(HASHES_STD_GOOD) { let file = create_file(content); // to capture progress updates from `compute_hash` @@ -168,18 +149,7 @@ mod tests { #[test] fn hash_size_mismatch() { - let bad_sizes = [ - 36, // common pangram - 12, // simple greeting - 1, // empty slice - 0, // single-byte - 9, // numeric ASCII - 24, // punctuation - 13, // mixed alphanumeric - 10, // embedded nulls - ]; - - for (&(content, good_size), bad_size) in CASES.iter().zip(bad_sizes) { + for (content, good_size, bad_size) in cases_with(DATA_LENGTHS_BAD) { let file = create_file(content); let callback = drop; @@ -203,7 +173,7 @@ mod tests { #[test] fn hash_value_none() { - for (content, size) in CASES { + for (content, size) in cases() { let file = create_file(content); let callback = drop; @@ -217,14 +187,9 @@ mod tests { #[test] fn hash_value_mismatch() { - let bad_hashes = [ - "invalid9k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram - "", // simple greeting - "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiG/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice - "Hash", // single-byte - ]; - - for ((&(content, size), good_hash), bad_hash) in CASES.iter().zip(HASHES).zip(bad_hashes) { + for ((content, size, good_hash), bad_hash) in + cases_with(HASHES_STD_GOOD).zip(HASHES_STD_BAD) + { let file = create_file(content); let callback = drop; diff --git a/src/test_util/data.rs b/src/test_util/data.rs new file mode 100644 index 0000000..a5593ba --- /dev/null +++ b/src/test_util/data.rs @@ -0,0 +1,118 @@ +/// test dataset +const DATA: [&[u8]; 8] = [ + // empty slice + b"", + // single-byte + b"x", + // common ascii pangram + b"The quick brown fox jumps over the lazy dog", + // ascii with punctuation and digits + b"Rust v1.65.0 - Memory Safety, Speed, Concurrency!", + // simple unicode (utf-8) greeting + "こんにちは世界".as_bytes(), + // pseudo-random bytes with embedded nuls + &[ + 0x3C, 0xA7, 0x5D, 0xE1, 0x4F, 0x99, 0x00, 0x20, 0x7F, 0xB3, 0xCD, 0x8A, 0x10, 0x55, 0xAA, + 0xFF, 0x5E, 0xA3, 0x1F, 0xC8, 0x72, 0x4D, 0x99, 0x00, 0xB7, 0x3C, 0x8E, 0xAD, 0x26, 0xF1, + ], + // long run of identical bytes (1 kib of ascii 'a') + &[b'A'; 1024], + // very large slice (10 mib of zeroes) + &[0u8; 10 * 1024 * 1024], +]; + +/// lengths of the test dataset +const DATA_LENGTHS: [u64; 8] = [ + DATA[0].len() as u64, + DATA[1].len() as u64, + DATA[2].len() as u64, + DATA[3].len() as u64, + DATA[4].len() as u64, + DATA[5].len() as u64, + DATA[6].len() as u64, + DATA[7].len() as u64, +]; + +/// anything but the lengths of the test dataset +pub const DATA_LENGTHS_BAD: [u64; 8] = [36, 12, 1, 0, 9, 24, 13, 10]; + +/// known good hashes of the test dataset +/// +/// using BLAKE2b, 512 bit, with unpadded Base64 (standard variant) +pub const HASHES_STD_GOOD: [&str; 8] = [ + // empty slice + "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", + // single-byte + "CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", + // common ascii pangram + "qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", + // ascii with punctuation and digits + "NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmA", + // simple unicode (utf-8) greeting + "h3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A", + // pseudo-random bytes with embedded nuls + "kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXNl6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg", + // long run of identical bytes (1 kib of ascii 'a') + "xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnOH+HR1H2S2Tmg43M+ym1A+AEPTE4J7iGljgntTdZA", + // very large slice (10 mib of zeroes) + "xsHH9h63e1+254TSCQoWCl6L5eGOo0Zg+ubtQC8Inwj7dwW7oxg0kYCrnkuTRj+7bVYNjlRSDOa8OIdInp73wA", +]; + +/// known bad version of `HASHES_STD_GOOD` +pub const HASHES_STD_BAD: [&str; 8] = [ + // off by one character (last “z” -> “y”) + "eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pviyg", + // truncated by dropping the final 4 chars + "CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iu", + // contains a non‐Base64 character (“#”) + "qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapG#", + // too long, extra “AA” at end + "NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmAAA", + // one byte altered at the front (“h” -> “H”) + "H3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A", + // garbled mid‐section + "kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXYZ6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg", + // entirely different length (too short) + "xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnO", + // correct length, but all “A”s (obviously wrong) + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", +]; + +// /// known good hashes of the test dataset +// /// +// /// using BLAKE2b, 128 bit, with unpadded Base64 (url safe variant) +// const HASHES_URL_GOOD: [&str; 8] = [ +// // empty slice +// "########################################################", +// // single-byte +// "########################################################", +// // common ascii pangram +// "", +// // ascii with punctuation and digits +// "", +// // simple unicode (utf-8) greeting +// "", +// // pseudo-random bytes with embedded nuls +// "", +// // long run of identical bytes (1 kib of ascii 'a') +// "", +// // very large slice (10 mib of zeroes) +// "", +// ]; + +pub fn data() -> impl Iterator { + DATA.iter().map(|item| *item) +} + +pub fn cases() -> impl Iterator { + data().zip(DATA_LENGTHS) +} + +pub fn cases_with(addons: T) -> impl Iterator +where + T: IntoIterator, +{ + cases() + .zip(addons) + .map(|((data, len), addon)| (data, len, addon)) +} diff --git a/src/test_util/mod.rs b/src/test_util/mod.rs index 3687825..fbca94c 100644 --- a/src/test_util/mod.rs +++ b/src/test_util/mod.rs @@ -1,5 +1,7 @@ #![cfg(test)] +pub mod data; + use std::{fmt, io::Write}; use tempfile::NamedTempFile;