implement better hashing

- move hashing back into `cachefile` to correctly handle the `rebuild_share` case
- add `-n` / `--no-hash` CLI switch to explicitly skip hashing
This commit is contained in:
Jörn-Michael Miehe 2025-06-25 22:47:55 +00:00
parent 1f9c247439
commit 465c857126
4 changed files with 101 additions and 95 deletions

View file

@ -49,8 +49,3 @@
- "continue" and "new" flags to avoid user interaction
- "quiet" flag to disable output entirely
- some switch to change log to "pretty-print"
- hashing
- store file hashes with all `file::*` variants
- check hashes on "continue"
- CLI switch to skip hashing

View file

@ -1,7 +1,7 @@
use std::{fmt, io, time::Duration};
use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{debug, info, warn};
use log::{debug, warn};
use crate::{
cachefile::CacheFile,
@ -26,13 +26,17 @@ impl fmt::Debug for AppState {
}
}
fn new_http(timeout: Option<Duration>) -> ureq::Agent {
fn new_http(args: &Cli) -> ureq::Agent {
ureq::Agent::config_builder()
.timeout_global(timeout)
.timeout_global(args.get_timeout())
.build()
.into()
}
fn new_share(args: &Cli) -> error::Result<String> {
new_http(args).share_create(&args.get_uri(), &args.alias, args.get_share_request())
}
impl AppState {
fn new(http: ureq::Agent, inner: CacheFile) -> Self {
Self {
@ -43,78 +47,14 @@ impl AppState {
}
pub fn try_resume(args: &Cli) -> error::Result<Self> {
fn check_hash<'a>(file: &'a impl FileTrait<'a>, bar: &ProgressBar) -> error::Result<()> {
bar.set_message(format!("checking {:?}", file.get_name()));
match file.check_hash(|bytes| bar.inc(bytes)) {
Ok(true) => Ok(()),
Ok(false) => Err(error::Error::unknown(format!(
"Hash mismatch for file {:?}!",
file.get_name()
))),
Err(e) => Err(e.into()),
}
}
let inner = CacheFile::try_resume(args)?;
// TODO CLI switch begin
info!("Checking hashes for {inner:?}");
// BOOKMARK assumption: total file size < 2 EiB
let total_size = {
let upl_size = if let Some(upl) = inner.peek_uploading() {
upl.get_size()
} else {
0
};
upl_size + inner.queue().iter().map(|&f| f.get_size()).sum::<u64>()
};
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
bar.set_length(total_size);
bar.enable_steady_tick(Duration::from_millis(50));
if let Some(upl) = inner.peek_uploading() {
check_hash(upl, &bar)?;
}
for chk in inner.queue() {
check_hash(chk, &bar)?;
}
bar.finish_with_message("finished checking files");
// TODO CLI switch end
Ok(Self::new(new_http(args.get_timeout()), inner))
Ok(Self::new(new_http(args), CacheFile::try_resume(args)?))
}
pub fn from_args(args: &Cli) -> error::Result<Self> {
// TODO CLI switch begin
let mut files = args.files.clone();
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(FileTrait::get_size).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish_with_message("finished hashing files");
// TODO CLI switch end
let http = new_http(args.get_timeout());
let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(http, CacheFile::from_args(args, share_id, files)))
Ok(Self::new(
new_http(args),
CacheFile::from_args(args, new_share)?,
))
}
fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) {
@ -199,15 +139,7 @@ impl AppState {
}
pub fn rebuild_share(self, args: &Cli) -> error::Result<Self> {
let share_id =
self.http
.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
let files = args.files.clone();
Ok(Self::new(
self.http,
CacheFile::from_args(args, share_id, files),
))
Ok(Self::new(self.http, CacheFile::from_args(args, new_share)?))
}
pub fn save(&self) -> io::Result<()> {

View file

@ -3,15 +3,18 @@ use std::{
fs,
io::{self, Write},
path::{Path, PathBuf},
time::Duration,
};
use log::trace;
use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{info, trace};
use serde::{Deserialize, Serialize};
use crate::{
cli::Cli,
error,
file::{self, Chunk},
file::{self, Chunk, FileTrait},
output::new_progressbar,
sharry::{Client, Uri},
};
@ -45,7 +48,7 @@ impl CacheFile {
file_name
}
pub fn try_resume(args: &Cli) -> io::Result<Self> {
pub fn try_resume(args: &Cli) -> error::Result<Self> {
let file_name = Self::cache_file(args);
let state: Self = {
@ -54,18 +57,85 @@ impl CacheFile {
serde_json::from_reader(reader).map_err(io::Error::other)?
};
if args.should_hash() {
fn check_hash<'a>(
file: &'a impl FileTrait<'a>,
bar: &ProgressBar,
) -> error::Result<()> {
bar.set_message(format!("checking {:?}", file.get_name()));
match file.check_hash(|bytes| bar.inc(bytes)) {
Ok(true) => Ok(()),
Ok(false) => Err(error::Error::unknown(format!(
"Hash mismatch for file {:?}!",
file.get_name()
))),
Err(e) => Err(e.into()),
}
}
info!("checking files in {state:?}");
// BOOKMARK assumption: total file size < 2 EiB
let total_size = {
let upl_size = if let Some(upl) = state.peek_uploading() {
upl.get_size()
} else {
0
};
upl_size + state.queue().iter().map(|&f| f.get_size()).sum::<u64>()
};
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
bar.set_length(total_size);
bar.enable_steady_tick(Duration::from_millis(50));
if let Some(upl) = state.peek_uploading() {
check_hash(upl, &bar)?;
}
for chk in state.queue() {
check_hash(chk, &bar)?;
}
bar.finish_with_message("finished checking files");
}
Ok(Self { file_name, ..state })
}
pub fn from_args(args: &Cli, share_id: String, files: Vec<file::Checked>) -> Self {
Self {
pub fn from_args(
args: &Cli,
new_share: impl FnOnce(&Cli) -> error::Result<String>,
) -> error::Result<Self> {
let mut files = args.files.clone();
if args.should_hash() {
info!("hashing files {files:?}");
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(FileTrait::get_size).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
}
bar.finish_with_message("finished hashing files");
}
Ok(Self {
file_name: Self::cache_file(args),
uri: args.get_uri(),
alias_id: args.alias.clone(),
share_id,
share_id: new_share(args)?,
uploading: None,
files: files.into(),
}
})
}
pub fn queue(&self) -> Vec<&file::Checked> {

View file

@ -39,7 +39,7 @@ pub struct Cli {
/// Name of the new share
#[arg(short, long, default_value = "ShrUpl Upload", value_name = "TEXT")]
name: String,
share_name: String,
/// Description of the new share
#[arg(short, long, value_name = "TEXT")]
@ -57,6 +57,10 @@ pub struct Cli {
)]
pub chunk_size: usize,
/// Don't hash files before uploading
#[arg(short, long)]
no_hash: bool,
/// Increase output verbosity
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
@ -125,9 +129,14 @@ impl Cli {
}
}
#[must_use]
pub fn should_hash(&self) -> bool {
!self.no_hash
}
#[must_use]
pub fn get_share_request(&self) -> NewShareRequest {
NewShareRequest::new(&self.name, self.description.as_ref(), self.max_views)
NewShareRequest::new(&self.share_name, self.description.as_ref(), self.max_views)
}
#[must_use]