implement better hashing

- move hashing back into `cachefile` to correctly handle the `rebuild_share` case
- add `-n` / `--no-hash` CLI switch to explicitly skip hashing
This commit is contained in:
Jörn-Michael Miehe 2025-06-25 22:47:55 +00:00
parent 1f9c247439
commit 465c857126
4 changed files with 101 additions and 95 deletions

View file

@ -49,8 +49,3 @@
- "continue" and "new" flags to avoid user interaction - "continue" and "new" flags to avoid user interaction
- "quiet" flag to disable output entirely - "quiet" flag to disable output entirely
- some switch to change log to "pretty-print" - some switch to change log to "pretty-print"
- hashing
- store file hashes with all `file::*` variants
- check hashes on "continue"
- CLI switch to skip hashing

View file

@ -1,7 +1,7 @@
use std::{fmt, io, time::Duration}; use std::{fmt, io, time::Duration};
use indicatif::{ProgressBar, ProgressDrawTarget}; use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{debug, info, warn}; use log::{debug, warn};
use crate::{ use crate::{
cachefile::CacheFile, cachefile::CacheFile,
@ -26,13 +26,17 @@ impl fmt::Debug for AppState {
} }
} }
fn new_http(timeout: Option<Duration>) -> ureq::Agent { fn new_http(args: &Cli) -> ureq::Agent {
ureq::Agent::config_builder() ureq::Agent::config_builder()
.timeout_global(timeout) .timeout_global(args.get_timeout())
.build() .build()
.into() .into()
} }
fn new_share(args: &Cli) -> error::Result<String> {
new_http(args).share_create(&args.get_uri(), &args.alias, args.get_share_request())
}
impl AppState { impl AppState {
fn new(http: ureq::Agent, inner: CacheFile) -> Self { fn new(http: ureq::Agent, inner: CacheFile) -> Self {
Self { Self {
@ -43,78 +47,14 @@ impl AppState {
} }
pub fn try_resume(args: &Cli) -> error::Result<Self> { pub fn try_resume(args: &Cli) -> error::Result<Self> {
fn check_hash<'a>(file: &'a impl FileTrait<'a>, bar: &ProgressBar) -> error::Result<()> { Ok(Self::new(new_http(args), CacheFile::try_resume(args)?))
bar.set_message(format!("checking {:?}", file.get_name()));
match file.check_hash(|bytes| bar.inc(bytes)) {
Ok(true) => Ok(()),
Ok(false) => Err(error::Error::unknown(format!(
"Hash mismatch for file {:?}!",
file.get_name()
))),
Err(e) => Err(e.into()),
}
}
let inner = CacheFile::try_resume(args)?;
// TODO CLI switch begin
info!("Checking hashes for {inner:?}");
// BOOKMARK assumption: total file size < 2 EiB
let total_size = {
let upl_size = if let Some(upl) = inner.peek_uploading() {
upl.get_size()
} else {
0
};
upl_size + inner.queue().iter().map(|&f| f.get_size()).sum::<u64>()
};
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
bar.set_length(total_size);
bar.enable_steady_tick(Duration::from_millis(50));
if let Some(upl) = inner.peek_uploading() {
check_hash(upl, &bar)?;
}
for chk in inner.queue() {
check_hash(chk, &bar)?;
}
bar.finish_with_message("finished checking files");
// TODO CLI switch end
Ok(Self::new(new_http(args.get_timeout()), inner))
} }
pub fn from_args(args: &Cli) -> error::Result<Self> { pub fn from_args(args: &Cli) -> error::Result<Self> {
// TODO CLI switch begin Ok(Self::new(
new_http(args),
let mut files = args.files.clone(); CacheFile::from_args(args, new_share)?,
))
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(FileTrait::get_size).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish_with_message("finished hashing files");
// TODO CLI switch end
let http = new_http(args.get_timeout());
let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(http, CacheFile::from_args(args, share_id, files)))
} }
fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) {
@ -199,15 +139,7 @@ impl AppState {
} }
pub fn rebuild_share(self, args: &Cli) -> error::Result<Self> { pub fn rebuild_share(self, args: &Cli) -> error::Result<Self> {
let share_id = Ok(Self::new(self.http, CacheFile::from_args(args, new_share)?))
self.http
.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
let files = args.files.clone();
Ok(Self::new(
self.http,
CacheFile::from_args(args, share_id, files),
))
} }
pub fn save(&self) -> io::Result<()> { pub fn save(&self) -> io::Result<()> {

View file

@ -3,15 +3,18 @@ use std::{
fs, fs,
io::{self, Write}, io::{self, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
time::Duration,
}; };
use log::trace; use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{info, trace};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{ use crate::{
cli::Cli, cli::Cli,
error, error,
file::{self, Chunk}, file::{self, Chunk, FileTrait},
output::new_progressbar,
sharry::{Client, Uri}, sharry::{Client, Uri},
}; };
@ -45,7 +48,7 @@ impl CacheFile {
file_name file_name
} }
pub fn try_resume(args: &Cli) -> io::Result<Self> { pub fn try_resume(args: &Cli) -> error::Result<Self> {
let file_name = Self::cache_file(args); let file_name = Self::cache_file(args);
let state: Self = { let state: Self = {
@ -54,18 +57,85 @@ impl CacheFile {
serde_json::from_reader(reader).map_err(io::Error::other)? serde_json::from_reader(reader).map_err(io::Error::other)?
}; };
if args.should_hash() {
fn check_hash<'a>(
file: &'a impl FileTrait<'a>,
bar: &ProgressBar,
) -> error::Result<()> {
bar.set_message(format!("checking {:?}", file.get_name()));
match file.check_hash(|bytes| bar.inc(bytes)) {
Ok(true) => Ok(()),
Ok(false) => Err(error::Error::unknown(format!(
"Hash mismatch for file {:?}!",
file.get_name()
))),
Err(e) => Err(e.into()),
}
}
info!("checking files in {state:?}");
// BOOKMARK assumption: total file size < 2 EiB
let total_size = {
let upl_size = if let Some(upl) = state.peek_uploading() {
upl.get_size()
} else {
0
};
upl_size + state.queue().iter().map(|&f| f.get_size()).sum::<u64>()
};
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
bar.set_length(total_size);
bar.enable_steady_tick(Duration::from_millis(50));
if let Some(upl) = state.peek_uploading() {
check_hash(upl, &bar)?;
}
for chk in state.queue() {
check_hash(chk, &bar)?;
}
bar.finish_with_message("finished checking files");
}
Ok(Self { file_name, ..state }) Ok(Self { file_name, ..state })
} }
pub fn from_args(args: &Cli, share_id: String, files: Vec<file::Checked>) -> Self { pub fn from_args(
Self { args: &Cli,
new_share: impl FnOnce(&Cli) -> error::Result<String>,
) -> error::Result<Self> {
let mut files = args.files.clone();
if args.should_hash() {
info!("hashing files {files:?}");
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(FileTrait::get_size).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
}
bar.finish_with_message("finished hashing files");
}
Ok(Self {
file_name: Self::cache_file(args), file_name: Self::cache_file(args),
uri: args.get_uri(), uri: args.get_uri(),
alias_id: args.alias.clone(), alias_id: args.alias.clone(),
share_id, share_id: new_share(args)?,
uploading: None, uploading: None,
files: files.into(), files: files.into(),
} })
} }
pub fn queue(&self) -> Vec<&file::Checked> { pub fn queue(&self) -> Vec<&file::Checked> {

View file

@ -39,7 +39,7 @@ pub struct Cli {
/// Name of the new share /// Name of the new share
#[arg(short, long, default_value = "ShrUpl Upload", value_name = "TEXT")] #[arg(short, long, default_value = "ShrUpl Upload", value_name = "TEXT")]
name: String, share_name: String,
/// Description of the new share /// Description of the new share
#[arg(short, long, value_name = "TEXT")] #[arg(short, long, value_name = "TEXT")]
@ -57,6 +57,10 @@ pub struct Cli {
)] )]
pub chunk_size: usize, pub chunk_size: usize,
/// Don't hash files before uploading
#[arg(short, long)]
no_hash: bool,
/// Increase output verbosity /// Increase output verbosity
#[arg(short, long, action = clap::ArgAction::Count)] #[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8, verbose: u8,
@ -125,9 +129,14 @@ impl Cli {
} }
} }
#[must_use]
pub fn should_hash(&self) -> bool {
!self.no_hash
}
#[must_use] #[must_use]
pub fn get_share_request(&self) -> NewShareRequest { pub fn get_share_request(&self) -> NewShareRequest {
NewShareRequest::new(&self.name, self.description.as_ref(), self.max_views) NewShareRequest::new(&self.share_name, self.description.as_ref(), self.max_views)
} }
#[must_use] #[must_use]