implement better hashing

- call `file::Checked::hash` in `AppState::from_args`
This commit is contained in:
Jörn-Michael Miehe 2025-06-24 19:34:11 +00:00
parent 2bcbc0d71c
commit ea5ef1fa10
6 changed files with 157 additions and 47 deletions

View file

@ -32,6 +32,20 @@ fn new_http(timeout: Option<Duration>) -> ureq::Agent {
.into() .into()
} }
fn new_progressbar() -> ProgressBar {
ProgressBar::hidden().with_style(
ProgressStyle::with_template(&format!(
concat!(
"{{bar:50.cyan/blue}} {{msg:.magenta}}: ",
"{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ",
"({{eta}})",
),
style("/").magenta(),
))
.expect("invalid style template"),
)
}
impl AppState { impl AppState {
fn new(http: ureq::Agent, inner: CacheFile) -> Self { fn new(http: ureq::Agent, inner: CacheFile) -> Self {
Self { Self {
@ -50,27 +64,38 @@ impl AppState {
} }
pub fn from_args(args: &Cli) -> sharry::Result<Self> { pub fn from_args(args: &Cli) -> sharry::Result<Self> {
let mut files = args.files.clone();
// TODO CLI switch begin
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(|f| f.get_size()).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish();
// TODO CLI switch end
let http = new_http(args.get_timeout()); let http = new_http(args.get_timeout());
let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?; let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(http, CacheFile::from_args(args, share_id))) Ok(Self::new(
http,
CacheFile::from_args(args, share_id).replace_files(files),
))
} }
fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) {
let bar = &*self.progress.get_or_insert_with(|| { let bar = &*self.progress.get_or_insert_with(new_progressbar);
ProgressBar::hidden().with_style(
ProgressStyle::with_template(&format!(
concat!(
"{{bar:50.cyan/blue}} {{msg:.magenta}}: ",
"{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ",
"({{eta}})",
),
style("/").magenta(),
))
.expect("style template is not valid"),
)
});
if let Some(upl) = self.inner.peek_uploading() { if let Some(upl) = self.inner.peek_uploading() {
if bar.length().is_none() { if bar.length().is_none() {

View file

@ -67,6 +67,13 @@ impl CacheFile {
} }
} }
pub fn replace_files(self, files: Vec<file::Checked>) -> Self {
Self {
files: files.into(),
..self
}
}
pub fn queue_empty(&self) -> bool { pub fn queue_empty(&self) -> bool {
self.files.is_empty() self.files.is_empty()
} }

View file

@ -20,6 +20,8 @@ pub struct Checked {
pub(super) path: PathBuf, pub(super) path: PathBuf,
/// size of that file /// size of that file
pub(super) size: u64, pub(super) size: u64,
/// hash of that file
pub(super) hash: Option<String>,
} }
impl AsRef<[u8]> for Checked { impl AsRef<[u8]> for Checked {
@ -41,6 +43,7 @@ impl Checked {
Ok(Self { Ok(Self {
path: fs::canonicalize(&value)?, path: fs::canonicalize(&value)?,
size: meta.len(), size: meta.len(),
hash: None,
}) })
} else { } else {
Err(io::Error::new( Err(io::Error::new(
@ -50,6 +53,19 @@ impl Checked {
} }
} }
pub fn hash(&mut self, f: impl Fn(u64)) -> io::Result<()> {
if self.hash.is_some() {
return Err(io::Error::other(format!(
"file {:?} is already hashed!",
self.path.display()
)));
}
self.hash = Some(super::compute_file_hash(&self.path, self.size, f)?);
Ok(())
}
/// start uploading this file /// start uploading this file
/// ///
/// - tries to create a new entry in a share /// - tries to create a new entry in a share
@ -68,7 +84,7 @@ impl Checked {
) -> sharry::Result<Uploading> { ) -> sharry::Result<Uploading> {
let file_id = client.file_create(uri, alias_id, share_id, &self)?; let file_id = client.file_create(uri, alias_id, share_id, &self)?;
Ok(Uploading::new(self.path, self.size, file_id)) Ok(Uploading::new(self.path, self.size, self.hash, file_id))
} }
} }
@ -84,4 +100,8 @@ impl<'t> FileTrait<'t> for Checked {
fn get_size(&self) -> u64 { fn get_size(&self) -> u64 {
self.size self.size
} }
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
}
} }

View file

@ -2,12 +2,62 @@ mod checked;
mod chunk; mod chunk;
mod uploading; mod uploading;
use std::{ffi::OsStr, path::Path}; use std::{
ffi::OsStr,
fs,
io::{self, Read},
path::Path,
};
use base64ct::{Base64, Encoding};
use blake2b_simd::Params as Blake2b;
pub use checked::Checked; pub use checked::Checked;
pub use chunk::Chunk; pub use chunk::Chunk;
pub use uploading::Uploading; pub use uploading::Uploading;
fn compute_file_hash<P>(path: P, size: u64, on_progress: impl Fn(u64)) -> io::Result<String>
where
P: AsRef<Path>,
{
let mut file = fs::File::open(path)?;
let mut hasher = Blake2b::new().hash_length(64).to_state();
let mut buf = vec![0u8; 4 * 1024 * 1024];
let mut bytes_read = 0;
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
bytes_read += n as u64;
on_progress(n as u64);
}
if bytes_read != size {
return Err(io::Error::other(format!(
"Hashed {bytes_read:?} bytes, known file size {:?}!",
size
)));
}
Ok(Base64::encode_string(hasher.finalize().as_bytes()))
}
fn check_file_hash(
path: impl AsRef<Path>,
size: u64,
hash: &Option<String>,
on_progress: impl Fn(u64),
) -> io::Result<bool> {
let Some(hash) = hash else { return Ok(false) };
Ok(*hash == compute_file_hash(path, size, on_progress)?)
}
pub trait FileTrait<'t> { pub trait FileTrait<'t> {
/// extract the filename part of a `Path` reference /// extract the filename part of a `Path` reference
/// ///
@ -25,4 +75,6 @@ pub trait FileTrait<'t> {
/// get the file's size /// get the file's size
fn get_size(&self) -> u64; fn get_size(&self) -> u64;
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool>;
} }

View file

@ -11,8 +11,12 @@ use super::{Checked, Chunk, FileTrait};
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
pub struct Uploading { pub struct Uploading {
/// canonical path to a regular file
path: PathBuf, path: PathBuf,
/// size of that file
size: u64, size: u64,
/// hash of that file
hash: Option<String>,
file_id: String, file_id: String,
#[serde(skip)] #[serde(skip)]
last_offset: Option<u64>, last_offset: Option<u64>,
@ -20,10 +24,11 @@ pub struct Uploading {
} }
impl Uploading { impl Uploading {
pub(super) fn new(path: PathBuf, size: u64, file_id: String) -> Self { pub(super) fn new(path: PathBuf, size: u64, hash: Option<String>, file_id: String) -> Self {
Self { Self {
path, path,
size, size,
hash,
file_id, file_id,
last_offset: None, last_offset: None,
offset: 0, offset: 0,
@ -79,6 +84,7 @@ impl Uploading {
Checked { Checked {
path: self.path, path: self.path,
size: self.size, size: self.size,
hash: self.hash,
} }
} }
} }
@ -94,4 +100,8 @@ impl<'t> FileTrait<'t> for Uploading {
fn get_size(&self) -> u64 { fn get_size(&self) -> u64 {
self.size self.size
} }
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
}
} }

View file

@ -24,6 +24,32 @@ use output::{Log, SHRUPL};
use sharry::{ClientError, Parameter}; use sharry::{ClientError, Parameter};
fn main() { fn main() {
let args = Cli::parse();
env_logger::Builder::new()
.filter_module("shrupl", args.get_level_filter())
.parse_default_env()
.init();
info!("args: {args:#?}");
println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL);
let mut state = AppState::try_resume(&args)
.and_then(|state| output::prompt_continue().then_some(state))
.unwrap_or_else(|| match AppState::from_args(&args) {
Ok(state) => {
state.save().unwrap_or_else(|e| {
Log::warning(format_args!("Failed to save state: {e}"));
});
state
}
Err(e) => {
Log::handle(&e);
Log::error(format_args!("Failed to create state: {e}"));
}
});
let check_ctrlc = { let check_ctrlc = {
let stop = Arc::new(AtomicBool::new(false)); let stop = Arc::new(AtomicBool::new(false));
let stop_ctrlc = stop.clone(); let stop_ctrlc = stop.clone();
@ -41,36 +67,6 @@ fn main() {
} }
}; };
let args = Cli::parse();
env_logger::Builder::new()
.filter_module("shrupl", args.get_level_filter())
.parse_default_env()
.init();
info!("args: {args:#?}");
println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL);
let mut state = AppState::try_resume(&args)
.and_then(|state| output::prompt_continue().then_some(state))
.unwrap_or_else(|| {
check_ctrlc();
match AppState::from_args(&args) {
Ok(state) => {
state.save().unwrap_or_else(|e| {
Log::warning(format_args!("Failed to save state: {e}"));
});
state
}
Err(e) => {
Log::handle(&e);
Log::error(format_args!("Failed to create state: {e}"));
}
}
});
info!("continuing with state: {state:#?}"); info!("continuing with state: {state:#?}");
let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", "); let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", ");