implement better hashing

- actually, calling `file::Checked::hash` in `AppState::from_args` was correct: If `AppState::rebuild_share` is called, hashes are in a known state.
- call `check_hash` in `AppState::try_resume`
- minor renamings
This commit is contained in:
Jörn-Michael Miehe 2025-06-25 10:44:36 +00:00
parent 11a5106473
commit 0b8bebad6e
8 changed files with 142 additions and 81 deletions

View file

@ -1,7 +1,7 @@
use std::{fmt, io, time::Duration};
use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{debug, warn};
use log::{debug, info, warn};
use crate::{
cachefile::CacheFile,
@ -41,20 +41,79 @@ impl AppState {
}
}
pub fn try_resume(args: &Cli) -> Option<Self> {
let inner = CacheFile::try_resume(args)
.inspect_err(|e| debug!("could not resume from hash {:?}: {e}", args.get_hash()))
.ok()?;
pub fn try_resume(args: &Cli) -> sharry::Result<Self> {
fn check_hash<'a>(file: &'a impl FileTrait<'a>, bar: &ProgressBar) -> sharry::Result<()> {
bar.set_message(format!("checking {:?}", file.get_name()));
match file.check_hash(|bytes| bar.inc(bytes)) {
Ok(true) => Ok(()),
Ok(false) => Err(sharry::ClientError::unknown(format!(
"Hash mismatch for file {:?}!",
file.get_name()
))),
Err(e) => Err(e.into()),
}
}
Some(Self::new(new_http(args.get_timeout()), inner))
let inner = CacheFile::try_resume(args)?;
// TODO CLI switch begin
info!("Checking hashes for {inner:?}");
// BOOKMARK assumption: total file size < 2 EiB
let total_size = {
let upl_size = if let Some(upl) = inner.peek_uploading() {
upl.get_size()
} else {
0
};
upl_size + inner.queue().iter().map(|&f| f.get_size()).sum::<u64>()
};
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
bar.set_length(total_size);
bar.enable_steady_tick(Duration::from_millis(50));
if let Some(upl) = inner.peek_uploading() {
check_hash(upl, &bar)?;
}
for chk in inner.queue() {
check_hash(chk, &bar)?;
}
bar.finish_with_message("finished checking files");
// TODO CLI switch end
Ok(Self::new(new_http(args.get_timeout()), inner))
}
pub fn from_args(args: &Cli) -> sharry::Result<Self> {
// TODO CLI switch begin
let mut files = args.files.clone();
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(FileTrait::get_size).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish_with_message("finished hashing files");
// TODO CLI switch end
let http = new_http(args.get_timeout());
let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(http, CacheFile::from_args(args, share_id)?))
Ok(Self::new(http, CacheFile::from_args(args, share_id, files)))
}
fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) {
@ -123,7 +182,7 @@ impl AppState {
self.drop_progressbar(ProgressBar::finish);
}
Ok(self.inner.peek_uploading().is_none() && self.inner.queue_empty())
Ok(self.inner.peek_uploading().is_none() && self.inner.queue().is_empty())
}
pub fn rewind_chunk(mut self) -> Option<Self> {
@ -141,15 +200,19 @@ impl AppState {
let share_id =
self.http
.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
let files = args.files.clone();
Ok(Self::new(self.http, CacheFile::from_args(args, share_id)?))
Ok(Self::new(
self.http,
CacheFile::from_args(args, share_id, files),
))
}
pub fn save(&self) -> io::Result<()> {
self.inner.save()
}
pub fn clear(self) -> io::Result<()> {
self.inner.clear()
pub fn discard(self) -> io::Result<()> {
self.inner.discard()
}
}

View file

@ -2,18 +2,15 @@ use std::{
collections::VecDeque,
fs,
io::{self, Write},
path::PathBuf,
time::Duration,
path::{Path, PathBuf},
};
use indicatif::ProgressDrawTarget;
use log::{debug, trace};
use log::trace;
use serde::{Deserialize, Serialize};
use crate::{
cli::Cli,
file::{self, Chunk, FileTrait},
output::new_progressbar,
file::{self, Chunk},
sharry::{self, Client, Uri},
};
@ -59,39 +56,19 @@ impl CacheFile {
Ok(Self { file_name, ..state })
}
pub fn from_args(args: &Cli, share_id: String) -> io::Result<Self> {
// TODO CLI switch begin
let mut files = args.files.clone();
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(|f| f.get_size()).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish_with_message("finished hashing files");
// TODO CLI switch end
Ok(Self {
pub fn from_args(args: &Cli, share_id: String, files: Vec<file::Checked>) -> Self {
Self {
file_name: Self::cache_file(args),
uri: args.get_uri(),
alias_id: args.alias.clone(),
share_id,
uploading: None,
files: files.into(),
})
}
}
pub fn queue_empty(&self) -> bool {
self.files.is_empty()
pub fn queue(&self) -> Vec<&file::Checked> {
self.files.iter().collect()
}
pub fn get_uploading(
@ -173,10 +150,18 @@ impl CacheFile {
Ok(())
}
pub fn clear(self) -> io::Result<()> {
fs::remove_file(&self.file_name)?;
fn remove(path: &Path) -> io::Result<()> {
fs::remove_file(path)?;
trace!("removed {:?}", self.file_name.display());
trace!("removed {:?}", path.display());
Ok(())
}
pub fn clear_any(args: &Cli) {
let _ = Self::remove(&Self::cache_file(args));
}
pub fn discard(self) -> io::Result<()> {
Self::remove(&self.file_name)
}
}

View file

@ -102,6 +102,6 @@ impl<'t> FileTrait<'t> for Checked {
}
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress)
}
}

View file

@ -14,12 +14,10 @@ use blake2b_simd::Params as Blake2b;
pub use checked::Checked;
pub use chunk::Chunk;
use log::debug;
pub use uploading::Uploading;
fn compute_file_hash<P>(path: P, size: u64, on_progress: impl Fn(u64)) -> io::Result<String>
where
P: AsRef<Path>,
{
fn compute_file_hash(path: &Path, size: u64, on_progress: impl Fn(u64)) -> io::Result<String> {
let mut file = fs::File::open(path)?;
let mut hasher = Blake2b::new().hash_length(64).to_state();
@ -39,23 +37,29 @@ where
if bytes_read != size {
return Err(io::Error::other(format!(
"Hashed {bytes_read:?} bytes, known file size {:?}!",
size
"Hashed {bytes_read:?} bytes, known file size {size:?}!"
)));
}
Ok(Base64::encode_string(hasher.finalize().as_bytes()))
let result = Base64::encode_string(hasher.finalize().as_bytes());
debug!("hashed {:?}: {result:?}", path.display());
Ok(result)
}
fn check_file_hash(
path: impl AsRef<Path>,
path: &Path,
size: u64,
hash: &Option<String>,
hash: Option<&String>,
on_progress: impl Fn(u64),
) -> io::Result<bool> {
let Some(hash) = hash else { return Ok(false) };
let Some(hash) = hash else {
debug!("no hash to check for {:?}!", path.display());
return Ok(false);
};
Ok(*hash == compute_file_hash(path, size, on_progress)?)
let result = *hash == compute_file_hash(path, size, on_progress)?;
debug!("matches {:?}: {result:?}", *hash);
Ok(result)
}
pub trait FileTrait<'t> {

View file

@ -102,6 +102,6 @@ impl<'t> FileTrait<'t> for Uploading {
}
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress)
}
}

View file

@ -35,7 +35,32 @@ fn main() {
println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL);
let mut state = AppState::try_resume(&args)
let resumed = AppState::try_resume(&args);
let check_ctrlc = {
let stop = Arc::new(AtomicBool::new(false));
let stop_ctrlc = stop.clone();
ctrlc::set_handler(move || {
stop_ctrlc.store(true, Ordering::SeqCst);
eprintln!("{} stopping as soon as possible!", *SHRUPL);
})
.expect("Error setting Ctrl-C handler");
move || {
if stop.load(Ordering::SeqCst) {
process::exit(255);
}
}
};
let mut state = resumed
.inspect_err(|e| {
cachefile::CacheFile::clear_any(&args);
Log::handle(e);
info!("could not resume from hash {:?}: {e}", args.get_hash());
})
.ok()
.and_then(|state| output::prompt_continue().then_some(state))
.unwrap_or_else(|| match AppState::from_args(&args) {
Ok(state) => {
@ -50,27 +75,11 @@ fn main() {
}
});
let check_ctrlc = {
let stop = Arc::new(AtomicBool::new(false));
let stop_ctrlc = stop.clone();
ctrlc::set_handler(move || {
stop_ctrlc.store(true, Ordering::SeqCst);
info!("stopping as soon as possible ...");
})
.expect("Error setting Ctrl-C handler");
move || {
if stop.load(Ordering::SeqCst) {
process::exit(255);
}
}
};
check_ctrlc();
info!("continuing with state: {state:#?}");
let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", ");
println!("{} is uploading: {fns_magenta}", *SHRUPL);
let mut buffer = vec![0; args.chunk_size * 1024 * 1024];
@ -133,7 +142,7 @@ fn main() {
check_ctrlc();
}
state.clear().unwrap_or_else(|e| {
state.discard().unwrap_or_else(|e| {
Log::warning(format_args!("Failed to remove state: {e}"));
});

View file

@ -20,7 +20,7 @@ impl AsRef<[u8]> for Uri {
impl Uri {
pub fn new(protocol: impl fmt::Display, base_url: impl fmt::Display) -> Self {
Self(format!("{}://{}", protocol, base_url))
Self(format!("{protocol}://{base_url}"))
}
fn endpoint(&self, path: fmt::Arguments) -> String {

View file

@ -118,7 +118,7 @@ pub enum ClientError {
#[error("Invalid {0}")]
InvalidParameter(Parameter),
#[error("Unknown error: {0}")]
#[error("{0}")]
Unknown(String),
}