mod checked; mod chunk; mod uploading; use std::{ffi::OsStr, fs, io::Read, path::Path}; use base64::prelude::{BASE64_STANDARD_NO_PAD as BASE64, Engine}; use blake2b_simd::Params as Blake2b; pub use checked::Checked; pub use chunk::Chunk; use log::{debug, warn}; pub use uploading::Uploading; /// how many bytes to hash at once (default: 4 MiB) /// /// size must never exceed 2 EiB const HASH_CHUNK_SIZE: usize = 4 * 1024 * 1024; /// compute hash for a file given its path. /// Hash function: `BLAKE2b`, 512 bit /// /// # Params /// /// - `path` to the file to hash /// - `size` of that file /// - `on_progress` will be called for each processed chunk (max. `HASH_CHUNK_SIZE`) /// /// # Errors /// /// - from `fs::File::{open, read}` /// - Mismatch if given `size` does not match the file's size fn compute_hash(path: &Path, size: u64, mut on_progress: impl FnMut(u64)) -> crate::Result { let mut file = fs::File::open(path)?; // Blake2b-512 hasher (64 * 8 bit) let mut hasher = Blake2b::new().hash_length(64).to_state(); // buffer let mut buffer = vec![0; HASH_CHUNK_SIZE]; let mut bytes_read = 0; loop { let n = file.read(&mut buffer)?; if n == 0 { break; } hasher.update(&buffer[..n]); bytes_read += n as u64; on_progress(n as u64); } if bytes_read != size { return crate::Error::mismatch(size, bytes_read); } let result = BASE64.encode(hasher.finalize()); debug!("hashed {:?}: {result:?}", path.display()); Ok(result) } /// check hash for a file given its path, return Ok(()) on success /// /// # Params /// /// - everything from `compute_hash` /// - optionally, known `hash` /// /// # Errors /// /// - from `file::compute_hash` /// - Mismatch if `hash` is `None` /// - Mismatch if given `hash` does not match the computed hash fn check_hash( path: &Path, size: u64, hash: Option<&str>, on_progress: impl FnMut(u64), ) -> crate::Result<()> { // check if hash is None let Some(expected) = hash else { return crate::Error::mismatch("hash", path.display()); }; // compute and check new hash let actual = &compute_hash(path, size, on_progress)?; if expected == actual { debug!("hash matches {expected:?}"); Ok(()) } else { warn!("hash mismatch for file {:?}", path.display()); crate::Error::mismatch(expected, actual) } } pub trait FileTrait { /// extract the filename part of a `Path` reference /// /// # Panics /// /// Expects `path::Path::file_name` and `ffi::OsStr::to_str` to succeed on the given path fn extract_file_name(p: &Path) -> &str { p.file_name() .and_then(OsStr::to_str) .expect("bad file name") } /// get a reference to the file's name /// /// Uses `file::FileTrait::extract_file_name`, which may **panic**! fn get_name(&self) -> &str; /// get the file's size fn get_size(&self) -> u64; /// check this file's hash, return Ok(()) on success /// /// # Errors /// /// - from `file::check_hash` fn check_hash(&self, on_progress: impl FnMut(u64)) -> crate::Result<()>; } #[cfg(test)] mod tests { use tempfile::TempDir; use crate::test_util::{ create_file, data::{DATA_LENGTHS_BAD, HASHES_STD_BAD, HASHES_STD_GOOD, cases, cases_with}, }; use super::*; #[test] fn compute_hash_as_expected() { for (content, size, expected_hash) in cases_with(HASHES_STD_GOOD) { let file = create_file(content); // to capture progress updates from `compute_hash` let mut read_total = 0; let callback = |n| read_total += n; let hash = compute_hash(file.path(), size, callback).unwrap(); assert_eq!(hash, expected_hash); assert_eq!(read_total, size); } } #[test] fn compute_hash_nonexistent_file() { let nex_path = { // this is deleted at the end of this block, so will stop to exist let nex_file = create_file(&[]); nex_file.path().to_owned() }; let err = compute_hash(&nex_path, 0, drop).unwrap_err(); assert!(matches!(err, crate::Error::StdIo(e) if e.kind() == std::io::ErrorKind::NotFound)); } #[test] fn compute_hash_directory() { let dir = TempDir::new().unwrap(); let err = compute_hash(dir.path(), 0, drop).unwrap_err(); assert!( matches!(err, crate::Error::StdIo(e) if e.kind() == std::io::ErrorKind::IsADirectory) ); } #[test] fn hash_size_mismatch() { for (content, good_size, bad_size) in cases_with(DATA_LENGTHS_BAD) { let file = create_file(content); { // `compute_hash` with bad size let err = compute_hash(file.path(), bad_size, drop).unwrap_err(); assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string())); } { // `check_hash` with bad size let err = check_hash(file.path(), bad_size, Some("foobar"), drop).unwrap_err(); assert!(err.is_mismatch(bad_size.to_string(), good_size.to_string())); } } } #[test] fn hash_value_none() { for (content, size) in cases() { let file = create_file(content); // `check_hash` with no hash let err = check_hash(file.path(), size, None, drop).unwrap_err(); assert!(err.is_mismatch("hash", file.path().display().to_string())); } } #[test] fn hash_value_mismatch() { for ((content, size, good_hash), bad_hash) in cases_with(HASHES_STD_GOOD).zip(HASHES_STD_BAD) { let file = create_file(content); // `check_hash` with bad hash let err = check_hash(file.path(), size, Some(bad_hash), drop).unwrap_err(); assert!(err.is_mismatch(bad_hash, good_hash)); } } }