[wip] unit tests for file module

- move test data to `test_util::data`
This commit is contained in:
Jörn-Michael Miehe 2025-07-05 01:24:53 +00:00
parent 389e33e512
commit f988c80854
5 changed files with 147 additions and 51 deletions

View file

@ -119,16 +119,16 @@ mod tests {
use tempfile::TempDir; use tempfile::TempDir;
use crate::{ use crate::test_util::{
file::tests::{CASES, HASHES}, create_file,
test_util::create_file, data::{HASHES_STD_GOOD, cases, data},
}; };
use super::*; use super::*;
#[test] #[test]
fn new_on_existing_file_works() { fn new_on_existing_file_works() {
for (content, size) in CASES { for (content, size) in cases() {
let file = create_file(content); let file = create_file(content);
let chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); let chk = Checked::new(file.path()).expect("creating `Checked` should succeed");
@ -180,7 +180,7 @@ mod tests {
#[test] #[test]
fn hashing_works() { fn hashing_works() {
for (&(content, _), hash) in CASES.iter().zip(HASHES) { for (content, hash) in data().zip(HASHES_STD_GOOD) {
let file = create_file(content); let file = create_file(content);
let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed");
@ -194,7 +194,7 @@ mod tests {
#[test] #[test]
fn hashing_again_errors() { fn hashing_again_errors() {
for (content, _) in CASES { for content in data() {
let file = create_file(content); let file = create_file(content);
let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed"); let mut chk = Checked::new(file.path()).expect("creating `Checked` should succeed");

View file

@ -52,3 +52,14 @@ impl<'t> Chunk<'t> {
self.offset + self.get_length() self.offset + self.get_length()
} }
} }
#[cfg(test)]
mod tests {
// use super::*;
// #[test]
// fn basic_tests() {
// let mut foo = [0u8; 10];
// let fid = sharry::FileID("fid".to_string());
// }
}

View file

@ -124,35 +124,16 @@ pub trait FileTrait {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::test_util::create_file; use crate::test_util::{
create_file,
data::{DATA_LENGTHS_BAD, HASHES_STD_BAD, HASHES_STD_GOOD, cases, cases_with},
};
use super::*; use super::*;
pub static CASES: [(&[u8], u64); 8] = [
(b"The quick brown fox jumps over the lazy dog", 43), // common pangram
(b"hello world", 11), // simple greeting
(b"", 0), // empty slice
(b"x", 1), // single-byte
(b"0123456789", 10), // numeric ASCII
(b"!@#$%^&*()_+-=[]{};':,.<>/?", 27), // punctuation
(b"RustLang1337", 12), // mixed alphanumeric
(b"foo\0bar\0baz", 11), // embedded nulls
];
pub static HASHES: [&str; 8] = [
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram
"Ahzth5kpbOylV4MquUGlC0oR+DR4zxQfUfkz9lOrn7zAWgN83b7QbjCb8zSULE5YzfGkbiN5EczX/Pl4fLx/0A", // simple greeting
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg", // single-byte
"UqCSwAW2Ib1X5QGgrtlQp2/vuwDQeqQ9rdb1NALMJUE3SfDTxi6MoKfbrjRIQa3qUdU/i2HZaaFdSmMYtXa4rA", // numeric ASCII
"Sr91qmX4R/Ly4HsJh5eiG3S1tuO81kwV0KPfRpn1j4jjrQoGL2I+SeKfcGvpXu3l/rfhGdJHF8ei775ZzdgK3Q", // punctuation
"Ox+zobaUmB8Ps410/TGOtjjLIJKaMUCwG/iFLNXjwRShuJAmtvQcK9Ahc9+SfD4Ci67HyPPorl7NGjN6LRrmlQ", // mixed alphanumeric
"a3rsGWE2kfvN6e2sVhioWP9NOmwLK9trzjc/GKXTPvvsiagiRSHMjlg5jy+bMepip68Pv69dY8TvTSFZES5Jzw", // embedded nulls
];
#[test] #[test]
fn compute_hash_as_expected() { fn compute_hash_as_expected() {
for (&(content, size), expected_hash) in CASES.iter().zip(HASHES) { for (content, size, expected_hash) in cases_with(HASHES_STD_GOOD) {
let file = create_file(content); let file = create_file(content);
// to capture progress updates from `compute_hash` // to capture progress updates from `compute_hash`
@ -168,18 +149,7 @@ mod tests {
#[test] #[test]
fn hash_size_mismatch() { fn hash_size_mismatch() {
let bad_sizes = [ for (content, good_size, bad_size) in cases_with(DATA_LENGTHS_BAD) {
36, // common pangram
12, // simple greeting
1, // empty slice
0, // single-byte
9, // numeric ASCII
24, // punctuation
13, // mixed alphanumeric
10, // embedded nulls
];
for (&(content, good_size), bad_size) in CASES.iter().zip(bad_sizes) {
let file = create_file(content); let file = create_file(content);
let callback = drop; let callback = drop;
@ -203,7 +173,7 @@ mod tests {
#[test] #[test]
fn hash_value_none() { fn hash_value_none() {
for (content, size) in CASES { for (content, size) in cases() {
let file = create_file(content); let file = create_file(content);
let callback = drop; let callback = drop;
@ -217,14 +187,9 @@ mod tests {
#[test] #[test]
fn hash_value_mismatch() { fn hash_value_mismatch() {
let bad_hashes = [ for ((content, size, good_hash), bad_hash) in
"invalid9k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA", // common pangram cases_with(HASHES_STD_GOOD).zip(HASHES_STD_BAD)
"", // simple greeting {
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiG/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg", // empty slice
"Hash", // single-byte
];
for ((&(content, size), good_hash), bad_hash) in CASES.iter().zip(HASHES).zip(bad_hashes) {
let file = create_file(content); let file = create_file(content);
let callback = drop; let callback = drop;

118
src/test_util/data.rs Normal file
View file

@ -0,0 +1,118 @@
/// test dataset
const DATA: [&[u8]; 8] = [
// empty slice
b"",
// single-byte
b"x",
// common ascii pangram
b"The quick brown fox jumps over the lazy dog",
// ascii with punctuation and digits
b"Rust v1.65.0 - Memory Safety, Speed, Concurrency!",
// simple unicode (utf-8) greeting
"こんにちは世界".as_bytes(),
// pseudo-random bytes with embedded nuls
&[
0x3C, 0xA7, 0x5D, 0xE1, 0x4F, 0x99, 0x00, 0x20, 0x7F, 0xB3, 0xCD, 0x8A, 0x10, 0x55, 0xAA,
0xFF, 0x5E, 0xA3, 0x1F, 0xC8, 0x72, 0x4D, 0x99, 0x00, 0xB7, 0x3C, 0x8E, 0xAD, 0x26, 0xF1,
],
// long run of identical bytes (1 kib of ascii 'a')
&[b'A'; 1024],
// very large slice (10 mib of zeroes)
&[0u8; 10 * 1024 * 1024],
];
/// lengths of the test dataset
const DATA_LENGTHS: [u64; 8] = [
DATA[0].len() as u64,
DATA[1].len() as u64,
DATA[2].len() as u64,
DATA[3].len() as u64,
DATA[4].len() as u64,
DATA[5].len() as u64,
DATA[6].len() as u64,
DATA[7].len() as u64,
];
/// anything but the lengths of the test dataset
pub const DATA_LENGTHS_BAD: [u64; 8] = [36, 12, 1, 0, 9, 24, 13, 10];
/// known good hashes of the test dataset
///
/// using BLAKE2b, 512 bit, with unpadded Base64 (standard variant)
pub const HASHES_STD_GOOD: [&str; 8] = [
// empty slice
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg",
// single-byte
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg",
// common ascii pangram
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA",
// ascii with punctuation and digits
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmA",
// simple unicode (utf-8) greeting
"h3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// pseudo-random bytes with embedded nuls
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXNl6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// long run of identical bytes (1 kib of ascii 'a')
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnOH+HR1H2S2Tmg43M+ym1A+AEPTE4J7iGljgntTdZA",
// very large slice (10 mib of zeroes)
"xsHH9h63e1+254TSCQoWCl6L5eGOo0Zg+ubtQC8Inwj7dwW7oxg0kYCrnkuTRj+7bVYNjlRSDOa8OIdInp73wA",
];
/// known bad version of `HASHES_STD_GOOD`
pub const HASHES_STD_BAD: [&str; 8] = [
// off by one character (last “z” -> “y”)
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pviyg",
// truncated by dropping the final 4 chars
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iu",
// contains a nonBase64 character (“#”)
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapG#",
// too long, extra “AA” at end
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmAAA",
// one byte altered at the front (“h” -> “H”)
"H3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// garbled midsection
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXYZ6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// entirely different length (too short)
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnO",
// correct length, but all “A”s (obviously wrong)
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
];
// /// known good hashes of the test dataset
// ///
// /// using BLAKE2b, 128 bit, with unpadded Base64 (url safe variant)
// const HASHES_URL_GOOD: [&str; 8] = [
// // empty slice
// "########################################################",
// // single-byte
// "########################################################",
// // common ascii pangram
// "",
// // ascii with punctuation and digits
// "",
// // simple unicode (utf-8) greeting
// "",
// // pseudo-random bytes with embedded nuls
// "",
// // long run of identical bytes (1 kib of ascii 'a')
// "",
// // very large slice (10 mib of zeroes)
// "",
// ];
pub fn data() -> impl Iterator<Item = &'static [u8]> {
DATA.iter().map(|item| *item)
}
pub fn cases() -> impl Iterator<Item = (&'static [u8], u64)> {
data().zip(DATA_LENGTHS)
}
pub fn cases_with<T>(addons: T) -> impl Iterator<Item = (&'static [u8], u64, T::Item)>
where
T: IntoIterator,
{
cases()
.zip(addons)
.map(|((data, len), addon)| (data, len, addon))
}

View file

@ -1,5 +1,7 @@
#![cfg(test)] #![cfg(test)]
pub mod data;
use std::{fmt, io::Write}; use std::{fmt, io::Write};
use tempfile::NamedTempFile; use tempfile::NamedTempFile;