shrupl/src/test_util/data.rs

119 lines
4.5 KiB
Rust
Raw Normal View History

/// test dataset
const DATA: [&[u8]; 8] = [
// empty slice
b"",
// single-byte
b"x",
// common ascii pangram
b"The quick brown fox jumps over the lazy dog",
// ascii with punctuation and digits
b"Rust v1.65.0 - Memory Safety, Speed, Concurrency!",
// simple unicode (utf-8) greeting
"こんにちは世界".as_bytes(),
// pseudo-random bytes with embedded nuls
&[
0x3C, 0xA7, 0x5D, 0xE1, 0x4F, 0x99, 0x00, 0x20, 0x7F, 0xB3, 0xCD, 0x8A, 0x10, 0x55, 0xAA,
0xFF, 0x5E, 0xA3, 0x1F, 0xC8, 0x72, 0x4D, 0x99, 0x00, 0xB7, 0x3C, 0x8E, 0xAD, 0x26, 0xF1,
],
2025-07-07 16:11:38 +00:00
// long run of identical bytes (1 KiB of ascii 'A')
&[b'A'; 1024],
2025-07-07 16:11:38 +00:00
// very large slice (10 MiB of zeroes)
&[0u8; 10 * 1024 * 1024],
];
/// lengths of the test dataset
const DATA_LENGTHS: [u64; 8] = [
DATA[0].len() as u64,
DATA[1].len() as u64,
DATA[2].len() as u64,
DATA[3].len() as u64,
DATA[4].len() as u64,
DATA[5].len() as u64,
DATA[6].len() as u64,
DATA[7].len() as u64,
];
/// anything but the lengths of the test dataset
pub const DATA_LENGTHS_BAD: [u64; 8] = [36, 12, 1, 0, 9, 24, 13, 10];
/// known good hashes of the test dataset
///
2025-07-07 19:40:56 +00:00
/// using `BLAKE2b`, 512 bit, with unpadded Base64 (standard variant)
pub const HASHES_STD_GOOD: [&str; 8] = [
// empty slice
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg",
// single-byte
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg",
// common ascii pangram
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA",
// ascii with punctuation and digits
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmA",
// simple unicode (utf-8) greeting
"h3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// pseudo-random bytes with embedded nuls
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXNl6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// long run of identical bytes (1 kib of ascii 'a')
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnOH+HR1H2S2Tmg43M+ym1A+AEPTE4J7iGljgntTdZA",
// very large slice (10 mib of zeroes)
"xsHH9h63e1+254TSCQoWCl6L5eGOo0Zg+ubtQC8Inwj7dwW7oxg0kYCrnkuTRj+7bVYNjlRSDOa8OIdInp73wA",
];
/// known bad version of `HASHES_STD_GOOD`
pub const HASHES_STD_BAD: [&str; 8] = [
// off by one character (last “z” -> “y”)
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pviyg",
// truncated by dropping the final 4 chars
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iu",
// contains a nonBase64 character (“#”)
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapG#",
// too long, extra “AA” at end
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmAAA",
// one byte altered at the front (“h” -> “H”)
"H3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// garbled midsection
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXYZ6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// entirely different length (too short)
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnO",
// correct length, but all “A”s (obviously wrong)
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
];
// /// known good hashes of the test dataset
// ///
// /// using BLAKE2b, 128 bit, with unpadded Base64 (url safe variant)
// const HASHES_URL_GOOD: [&str; 8] = [
// // empty slice
// "########################################################",
// // single-byte
// "########################################################",
// // common ascii pangram
// "",
// // ascii with punctuation and digits
// "",
// // simple unicode (utf-8) greeting
// "",
// // pseudo-random bytes with embedded nuls
// "",
// // long run of identical bytes (1 kib of ascii 'a')
// "",
// // very large slice (10 mib of zeroes)
// "",
// ];
pub fn data() -> impl Iterator<Item = &'static [u8]> {
2025-07-07 19:40:56 +00:00
DATA.iter().copied()
}
pub fn cases() -> impl Iterator<Item = (&'static [u8], u64)> {
data().zip(DATA_LENGTHS)
}
pub fn cases_with<T>(addons: T) -> impl Iterator<Item = (&'static [u8], u64, T::Item)>
where
T: IntoIterator,
{
cases()
.zip(addons)
.map(|((data, len), addon)| (data, len, addon))
}