shrupl/src/test_util/data.rs
2025-07-07 19:40:56 +00:00

118 lines
4.5 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// test dataset
const DATA: [&[u8]; 8] = [
// empty slice
b"",
// single-byte
b"x",
// common ascii pangram
b"The quick brown fox jumps over the lazy dog",
// ascii with punctuation and digits
b"Rust v1.65.0 - Memory Safety, Speed, Concurrency!",
// simple unicode (utf-8) greeting
"こんにちは世界".as_bytes(),
// pseudo-random bytes with embedded nuls
&[
0x3C, 0xA7, 0x5D, 0xE1, 0x4F, 0x99, 0x00, 0x20, 0x7F, 0xB3, 0xCD, 0x8A, 0x10, 0x55, 0xAA,
0xFF, 0x5E, 0xA3, 0x1F, 0xC8, 0x72, 0x4D, 0x99, 0x00, 0xB7, 0x3C, 0x8E, 0xAD, 0x26, 0xF1,
],
// long run of identical bytes (1 KiB of ascii 'A')
&[b'A'; 1024],
// very large slice (10 MiB of zeroes)
&[0u8; 10 * 1024 * 1024],
];
/// lengths of the test dataset
const DATA_LENGTHS: [u64; 8] = [
DATA[0].len() as u64,
DATA[1].len() as u64,
DATA[2].len() as u64,
DATA[3].len() as u64,
DATA[4].len() as u64,
DATA[5].len() as u64,
DATA[6].len() as u64,
DATA[7].len() as u64,
];
/// anything but the lengths of the test dataset
pub const DATA_LENGTHS_BAD: [u64; 8] = [36, 12, 1, 0, 9, 24, 13, 10];
/// known good hashes of the test dataset
///
/// using `BLAKE2b`, 512 bit, with unpadded Base64 (standard variant)
pub const HASHES_STD_GOOD: [&str; 8] = [
// empty slice
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pvizg",
// single-byte
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iuMNCg",
// common ascii pangram
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapGA",
// ascii with punctuation and digits
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmA",
// simple unicode (utf-8) greeting
"h3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// pseudo-random bytes with embedded nuls
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXNl6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// long run of identical bytes (1 kib of ascii 'a')
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnOH+HR1H2S2Tmg43M+ym1A+AEPTE4J7iGljgntTdZA",
// very large slice (10 mib of zeroes)
"xsHH9h63e1+254TSCQoWCl6L5eGOo0Zg+ubtQC8Inwj7dwW7oxg0kYCrnkuTRj+7bVYNjlRSDOa8OIdInp73wA",
];
/// known bad version of `HASHES_STD_GOOD`
pub const HASHES_STD_BAD: [&str; 8] = [
// off by one character (last “z” -> “y”)
"eGoC90IBWQPGxv2FJVLScpEvR0DhWEdhiobiF/cfVBnSXhAxr+5YUxOJZESTTrBLkDpoWxRIt1XVb3Aa/pviyg",
// truncated by dropping the final 4 chars
"CQk3etNREMr7KQnhhWcrfyco0fUJT4rWjW+sYnS/H0mUhagOo2TATtAG0pRZ6jy3xgAoDi+D4DJSmQb4iu",
// contains a nonBase64 character (“#”)
"qK3Uvd39k+SHfSdG5igXsRY2Sh+nvBSNlQkLxzM7NnP4JAHPeqLkyx7NkCluPxTLVBP47Xe+cwRbE5FM3NapG#",
// too long, extra “AA” at end
"NOtceHp9LrSYpXvSP3ayPbgMUyX4hynBYt4KtHuwJDsv1ELco5QeUj9aJTYTqbw4KzRKY+RjsbR26N3smUeCmAAA",
// one byte altered at the front (“h” -> “H”)
"H3xQg25wr/XqaXgqXWJivbVgN89XQoZUN/JcSZB0jxOtkbVStY7hnO+pm3PnLv6yZ4ZDLrxzYpoBk05BR7Wo1A",
// garbled midsection
"kiUYjOegDM9n1ryWtZhukpTuZ8oZbhi2onpXYZ6pg16R+JZj5ty4uJZs44YbCu0A9m35Xs3bi/mxfbSulbo5Rg",
// entirely different length (too short)
"xwGOG01h2kco4CgjJlD9T2v5bM8XVuCrYzKTM4D0s7rCnO",
// correct length, but all “A”s (obviously wrong)
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
];
// /// known good hashes of the test dataset
// ///
// /// using BLAKE2b, 128 bit, with unpadded Base64 (url safe variant)
// const HASHES_URL_GOOD: [&str; 8] = [
// // empty slice
// "########################################################",
// // single-byte
// "########################################################",
// // common ascii pangram
// "",
// // ascii with punctuation and digits
// "",
// // simple unicode (utf-8) greeting
// "",
// // pseudo-random bytes with embedded nuls
// "",
// // long run of identical bytes (1 kib of ascii 'a')
// "",
// // very large slice (10 mib of zeroes)
// "",
// ];
pub fn data() -> impl Iterator<Item = &'static [u8]> {
DATA.iter().copied()
}
pub fn cases() -> impl Iterator<Item = (&'static [u8], u64)> {
data().zip(DATA_LENGTHS)
}
pub fn cases_with<T>(addons: T) -> impl Iterator<Item = (&'static [u8], u64, T::Item)>
where
T: IntoIterator,
{
cases()
.zip(addons)
.map(|((data, len), addon)| (data, len, addon))
}