From 6e553cc185049a57b463674efca75216fae45151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Tue, 24 Jun 2025 16:29:38 +0000 Subject: [PATCH 1/6] implement better hashing - use Blake2b128 for the CLI arguments hash --- Cargo.lock | 63 +++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 ++ src/cli.rs | 40 ++++++++++++++++------------ src/file/checked.rs | 12 ++++++--- src/sharry/api.rs | 24 ++++++++--------- 5 files changed, 109 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57f40d6..434980f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,12 +73,36 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + [[package]] name = "bitflags" version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -209,6 +233,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlc" version = "3.4.7" @@ -239,6 +273,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -340,6 +385,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -846,6 +901,8 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" name = "shrupl" version = "0.1.0-alpha" dependencies = [ + "base64ct", + "blake2", "clap", "console", "ctrlc", @@ -988,6 +1045,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "typenum" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index 623bc0c..d20c0ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,8 @@ edition = "2024" description = "ShrUpl is a tool to upload files to a Sharry Instance through a public Alias, leveraging the tus protocol" [dependencies] +base64ct = { version = "1.8.0", default-features = false, features = ["alloc"] } +blake2 = { version = "0.10.6", default-features = false } clap = { version = "4.5.38", features = ["derive"] } console = { version = "0.15.11", default-features = false } ctrlc = { version = "3.4.7", features = ["termination"] } diff --git a/src/cli.rs b/src/cli.rs index 546427b..2e2a947 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,11 +1,7 @@ -use std::{ - convert::Infallible, - fmt, - hash::{DefaultHasher, Hash, Hasher}, - io, - time::Duration, -}; +use std::{convert::Infallible, fmt, io, time::Duration}; +use base64ct::{Base64UrlUnpadded, Encoding}; +use blake2::{Blake2b, Digest, digest::consts::U16}; use clap::{ Parser, builder::{PossibleValuesParser, TypedValueParser}, @@ -18,7 +14,7 @@ use crate::{ sharry::{NewShareRequest, Uri}, }; -#[derive(Parser, Hash)] +#[derive(Parser)] #[command(version, about, long_about = None)] pub struct Cli { /// Timeout in seconds for HTTP actions (set 0 or invalid to disable) @@ -100,6 +96,18 @@ fn parse_sharry_file(data: &str) -> io::Result { Checked::new(data) } +type Blake2b128 = Blake2b; + +fn sorted(values: &[T]) -> Vec<&T> +where + T: Ord, +{ + let mut refs: Vec<_> = values.iter().collect(); + refs.sort_unstable(); + + refs +} + impl Cli { pub fn get_timeout(&self) -> Option { (!self.timeout.is_zero()).then_some(self.timeout) @@ -135,16 +143,14 @@ impl Cli { } pub fn get_hash(&self) -> String { - let file_refs = { - let mut refs: Vec<_> = self.files.iter().collect(); - refs.sort_unstable(); + let mut hasher = Blake2b128::new(); + hasher.update(self.get_uri()); + hasher.update(&self.alias); - refs - }; + for chk in sorted(&self.files) { + hasher.update(chk); + } - let mut hasher = DefaultHasher::new(); - (self.get_uri(), &self.alias, file_refs).hash(&mut hasher); - - format!("{:x}", hasher.finish()) + Base64UrlUnpadded::encode_string(&hasher.finalize()) } } diff --git a/src/file/checked.rs b/src/file/checked.rs index 58bdf79..45c1d90 100644 --- a/src/file/checked.rs +++ b/src/file/checked.rs @@ -11,10 +11,10 @@ use super::{FileTrait, Uploading}; /// Description of an existing, regular file /// -/// - impl Debug, Clone, Hash for `clap` compatibility +/// - impl Clone for `clap` compatibility /// - impl serde for appstate caching -/// - impl Ord to handle multiple files given -#[derive(Debug, Clone, Hash, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +/// - impl PartialEq..Ord to handle multiple files given +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct Checked { /// canonical path to a regular file pub(super) path: PathBuf, @@ -22,6 +22,12 @@ pub struct Checked { pub(super) size: u64, } +impl AsRef<[u8]> for Checked { + fn as_ref(&self) -> &[u8] { + self.path.as_os_str().as_encoded_bytes() + } +} + impl Checked { /// create a new checked file from some path reference /// diff --git a/src/sharry/api.rs b/src/sharry/api.rs index b762e4e..3f11560 100644 --- a/src/sharry/api.rs +++ b/src/sharry/api.rs @@ -3,28 +3,28 @@ use std::fmt; use log::trace; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug, Hash)] -pub struct Uri { - protocol: String, - base_url: String, -} +#[derive(Serialize, Deserialize, Debug)] +pub struct Uri(String); impl fmt::Display for Uri { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}://{}", self.protocol, self.base_url) + f.write_str(&self.0) + } +} + +impl AsRef<[u8]> for Uri { + fn as_ref(&self) -> &[u8] { + self.0.as_bytes() } } impl Uri { - pub fn new(protocol: impl Into, base_url: impl Into) -> Self { - Self { - protocol: protocol.into(), - base_url: base_url.into(), - } + pub fn new(protocol: impl fmt::Display, base_url: impl fmt::Display) -> Self { + Self(format!("{}://{}", protocol, base_url)) } fn endpoint(&self, path: fmt::Arguments) -> String { - let uri = format!("{}://{}/api/v2/{path}", self.protocol, self.base_url); + let uri = format!("{}/api/v2/{path}", self.0); trace!("endpoint: {uri:?}"); uri } From 2bcbc0d71c9780ca72b073084563a757c689ad26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Tue, 24 Jun 2025 19:07:22 +0000 Subject: [PATCH 2/6] implement better hashing - use `blake2b_simd` crate and optimize --- .cargo/config.toml | 7 +++++ Cargo.lock | 76 +++++++++++++++------------------------------- Cargo.toml | 8 ++++- src/cli.rs | 15 +++++---- 4 files changed, 46 insertions(+), 60 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index d67b66e..b0f43ce 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,9 @@ [build] target = "x86_64-unknown-linux-musl" + +# rustflags = [ +# # emit instructions tuned to the current CPU +# "-C", "target-cpu=native", +# # assume CPU features +# "-C", "target-feature=+avx2,+sse4.1,+ssse3,+aes", +# ] \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 434980f..cb94d7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "base64" version = "0.22.1" @@ -86,21 +98,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] -name = "blake2" -version = "0.10.6" +name = "blake2b_simd" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +checksum = "06e903a20b159e944f91ec8499fe1e55651480c541ea0a584f5d967c49ad9d99" dependencies = [ - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", + "arrayref", + "arrayvec", + "constant_time_eq", ] [[package]] @@ -195,6 +200,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "cookie" version = "0.18.1" @@ -233,16 +244,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "ctrlc" version = "3.4.7" @@ -273,17 +274,6 @@ dependencies = [ "thiserror 1.0.69", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -385,16 +375,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.16" @@ -902,7 +882,7 @@ name = "shrupl" version = "0.1.0-alpha" dependencies = [ "base64ct", - "blake2", + "blake2b_simd", "clap", "console", "ctrlc", @@ -1045,12 +1025,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index d20c0ec..9b87ac4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ description = "ShrUpl is a tool to upload files to a Sharry Instance through a p [dependencies] base64ct = { version = "1.8.0", default-features = false, features = ["alloc"] } -blake2 = { version = "0.10.6", default-features = false } +blake2b_simd = "1.0.3" clap = { version = "4.5.38", features = ["derive"] } console = { version = "0.15.11", default-features = false } ctrlc = { version = "3.4.7", features = ["termination"] } @@ -22,5 +22,11 @@ thiserror = "2.0.12" ureq = { version = "3.0.11", features = ["json"] } [profile.release] +# Optimize for speed even more aggressively +opt-level = "z" +# better inlining +codegen-units = 1 +# linker‐time optimization lto = true +debug = false panic = "abort" diff --git a/src/cli.rs b/src/cli.rs index 2e2a947..82d0bd0 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,7 +1,7 @@ use std::{convert::Infallible, fmt, io, time::Duration}; use base64ct::{Base64UrlUnpadded, Encoding}; -use blake2::{Blake2b, Digest, digest::consts::U16}; +use blake2b_simd::Params as Blake2b; use clap::{ Parser, builder::{PossibleValuesParser, TypedValueParser}, @@ -96,8 +96,6 @@ fn parse_sharry_file(data: &str) -> io::Result { Checked::new(data) } -type Blake2b128 = Blake2b; - fn sorted(values: &[T]) -> Vec<&T> where T: Ord, @@ -143,14 +141,15 @@ impl Cli { } pub fn get_hash(&self) -> String { - let mut hasher = Blake2b128::new(); - hasher.update(self.get_uri()); - hasher.update(&self.alias); + let mut hasher = Blake2b::new().hash_length(16).to_state(); + + hasher.update(self.get_uri().as_ref()); + hasher.update(self.alias.as_bytes()); for chk in sorted(&self.files) { - hasher.update(chk); + hasher.update(chk.as_ref()); } - Base64UrlUnpadded::encode_string(&hasher.finalize()) + Base64UrlUnpadded::encode_string(hasher.finalize().as_bytes()) } } From ea5ef1fa10c70b5862f550479f45503e53c2d09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Tue, 24 Jun 2025 19:34:11 +0000 Subject: [PATCH 3/6] implement better hashing - call `file::Checked::hash` in `AppState::from_args` --- src/appstate.rs | 53 +++++++++++++++++++++++++++++----------- src/cachefile.rs | 7 ++++++ src/file/checked.rs | 22 ++++++++++++++++- src/file/mod.rs | 54 ++++++++++++++++++++++++++++++++++++++++- src/file/uploading.rs | 12 +++++++++- src/main.rs | 56 ++++++++++++++++++++----------------------- 6 files changed, 157 insertions(+), 47 deletions(-) diff --git a/src/appstate.rs b/src/appstate.rs index 6ea5a25..19eb63e 100644 --- a/src/appstate.rs +++ b/src/appstate.rs @@ -32,6 +32,20 @@ fn new_http(timeout: Option) -> ureq::Agent { .into() } +fn new_progressbar() -> ProgressBar { + ProgressBar::hidden().with_style( + ProgressStyle::with_template(&format!( + concat!( + "{{bar:50.cyan/blue}} {{msg:.magenta}}: ", + "{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ", + "({{eta}})", + ), + style("/").magenta(), + )) + .expect("invalid style template"), + ) +} + impl AppState { fn new(http: ureq::Agent, inner: CacheFile) -> Self { Self { @@ -50,27 +64,38 @@ impl AppState { } pub fn from_args(args: &Cli) -> sharry::Result { + let mut files = args.files.clone(); + + // TODO CLI switch begin + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + // BOOKMARK assumption: total file size < 2 EiB + bar.set_length(files.iter().map(|f| f.get_size()).sum()); + bar.enable_steady_tick(Duration::from_millis(50)); + + for chk in &mut files { + bar.set_message(format!("hashing {:?}", chk.get_name())); + chk.hash(|bytes| bar.inc(bytes))?; + debug!("{chk:?}"); + } + + bar.finish(); + + // TODO CLI switch end + let http = new_http(args.get_timeout()); let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - Ok(Self::new(http, CacheFile::from_args(args, share_id))) + Ok(Self::new( + http, + CacheFile::from_args(args, share_id).replace_files(files), + )) } fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { - let bar = &*self.progress.get_or_insert_with(|| { - ProgressBar::hidden().with_style( - ProgressStyle::with_template(&format!( - concat!( - "{{bar:50.cyan/blue}} {{msg:.magenta}}: ", - "{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ", - "({{eta}})", - ), - style("/").magenta(), - )) - .expect("style template is not valid"), - ) - }); + let bar = &*self.progress.get_or_insert_with(new_progressbar); if let Some(upl) = self.inner.peek_uploading() { if bar.length().is_none() { diff --git a/src/cachefile.rs b/src/cachefile.rs index 8266f41..1aed7bb 100644 --- a/src/cachefile.rs +++ b/src/cachefile.rs @@ -67,6 +67,13 @@ impl CacheFile { } } + pub fn replace_files(self, files: Vec) -> Self { + Self { + files: files.into(), + ..self + } + } + pub fn queue_empty(&self) -> bool { self.files.is_empty() } diff --git a/src/file/checked.rs b/src/file/checked.rs index 45c1d90..6dbf7a4 100644 --- a/src/file/checked.rs +++ b/src/file/checked.rs @@ -20,6 +20,8 @@ pub struct Checked { pub(super) path: PathBuf, /// size of that file pub(super) size: u64, + /// hash of that file + pub(super) hash: Option, } impl AsRef<[u8]> for Checked { @@ -41,6 +43,7 @@ impl Checked { Ok(Self { path: fs::canonicalize(&value)?, size: meta.len(), + hash: None, }) } else { Err(io::Error::new( @@ -50,6 +53,19 @@ impl Checked { } } + pub fn hash(&mut self, f: impl Fn(u64)) -> io::Result<()> { + if self.hash.is_some() { + return Err(io::Error::other(format!( + "file {:?} is already hashed!", + self.path.display() + ))); + } + + self.hash = Some(super::compute_file_hash(&self.path, self.size, f)?); + + Ok(()) + } + /// start uploading this file /// /// - tries to create a new entry in a share @@ -68,7 +84,7 @@ impl Checked { ) -> sharry::Result { let file_id = client.file_create(uri, alias_id, share_id, &self)?; - Ok(Uploading::new(self.path, self.size, file_id)) + Ok(Uploading::new(self.path, self.size, self.hash, file_id)) } } @@ -84,4 +100,8 @@ impl<'t> FileTrait<'t> for Checked { fn get_size(&self) -> u64 { self.size } + + fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result { + super::check_file_hash(&self.path, self.size, &self.hash, on_progress) + } } diff --git a/src/file/mod.rs b/src/file/mod.rs index f37b74c..1e99682 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -2,12 +2,62 @@ mod checked; mod chunk; mod uploading; -use std::{ffi::OsStr, path::Path}; +use std::{ + ffi::OsStr, + fs, + io::{self, Read}, + path::Path, +}; + +use base64ct::{Base64, Encoding}; +use blake2b_simd::Params as Blake2b; pub use checked::Checked; pub use chunk::Chunk; pub use uploading::Uploading; +fn compute_file_hash

(path: P, size: u64, on_progress: impl Fn(u64)) -> io::Result +where + P: AsRef, +{ + let mut file = fs::File::open(path)?; + let mut hasher = Blake2b::new().hash_length(64).to_state(); + + let mut buf = vec![0u8; 4 * 1024 * 1024]; + let mut bytes_read = 0; + + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + + bytes_read += n as u64; + on_progress(n as u64); + } + + if bytes_read != size { + return Err(io::Error::other(format!( + "Hashed {bytes_read:?} bytes, known file size {:?}!", + size + ))); + } + + Ok(Base64::encode_string(hasher.finalize().as_bytes())) +} + +fn check_file_hash( + path: impl AsRef, + size: u64, + hash: &Option, + on_progress: impl Fn(u64), +) -> io::Result { + let Some(hash) = hash else { return Ok(false) }; + + Ok(*hash == compute_file_hash(path, size, on_progress)?) +} + pub trait FileTrait<'t> { /// extract the filename part of a `Path` reference /// @@ -25,4 +75,6 @@ pub trait FileTrait<'t> { /// get the file's size fn get_size(&self) -> u64; + + fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result; } diff --git a/src/file/uploading.rs b/src/file/uploading.rs index 6c924c2..cb91d07 100644 --- a/src/file/uploading.rs +++ b/src/file/uploading.rs @@ -11,8 +11,12 @@ use super::{Checked, Chunk, FileTrait}; #[derive(Serialize, Deserialize, Debug)] pub struct Uploading { + /// canonical path to a regular file path: PathBuf, + /// size of that file size: u64, + /// hash of that file + hash: Option, file_id: String, #[serde(skip)] last_offset: Option, @@ -20,10 +24,11 @@ pub struct Uploading { } impl Uploading { - pub(super) fn new(path: PathBuf, size: u64, file_id: String) -> Self { + pub(super) fn new(path: PathBuf, size: u64, hash: Option, file_id: String) -> Self { Self { path, size, + hash, file_id, last_offset: None, offset: 0, @@ -79,6 +84,7 @@ impl Uploading { Checked { path: self.path, size: self.size, + hash: self.hash, } } } @@ -94,4 +100,8 @@ impl<'t> FileTrait<'t> for Uploading { fn get_size(&self) -> u64 { self.size } + + fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result { + super::check_file_hash(&self.path, self.size, &self.hash, on_progress) + } } diff --git a/src/main.rs b/src/main.rs index ac8c9b3..797a25f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,6 +24,32 @@ use output::{Log, SHRUPL}; use sharry::{ClientError, Parameter}; fn main() { + let args = Cli::parse(); + + env_logger::Builder::new() + .filter_module("shrupl", args.get_level_filter()) + .parse_default_env() + .init(); + + info!("args: {args:#?}"); + + println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL); + + let mut state = AppState::try_resume(&args) + .and_then(|state| output::prompt_continue().then_some(state)) + .unwrap_or_else(|| match AppState::from_args(&args) { + Ok(state) => { + state.save().unwrap_or_else(|e| { + Log::warning(format_args!("Failed to save state: {e}")); + }); + state + } + Err(e) => { + Log::handle(&e); + Log::error(format_args!("Failed to create state: {e}")); + } + }); + let check_ctrlc = { let stop = Arc::new(AtomicBool::new(false)); let stop_ctrlc = stop.clone(); @@ -41,36 +67,6 @@ fn main() { } }; - let args = Cli::parse(); - - env_logger::Builder::new() - .filter_module("shrupl", args.get_level_filter()) - .parse_default_env() - .init(); - - info!("args: {args:#?}"); - - println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL); - - let mut state = AppState::try_resume(&args) - .and_then(|state| output::prompt_continue().then_some(state)) - .unwrap_or_else(|| { - check_ctrlc(); - - match AppState::from_args(&args) { - Ok(state) => { - state.save().unwrap_or_else(|e| { - Log::warning(format_args!("Failed to save state: {e}")); - }); - state - } - Err(e) => { - Log::handle(&e); - Log::error(format_args!("Failed to create state: {e}")); - } - } - }); - info!("continuing with state: {state:#?}"); let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", "); From 11a510647308650d5a67130a1e4bbcbdbe765a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:58:18 +0000 Subject: [PATCH 4/6] implement better hashing - call `file::Checked::hash` in `CacheFile::from_args` instead --- src/appstate.rs | 45 ++++----------------------------------------- src/cachefile.rs | 40 ++++++++++++++++++++++++++++------------ src/output.rs | 15 +++++++++++++++ 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/src/appstate.rs b/src/appstate.rs index 19eb63e..3c3fbf3 100644 --- a/src/appstate.rs +++ b/src/appstate.rs @@ -1,13 +1,13 @@ use std::{fmt, io, time::Duration}; -use console::style; -use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; +use indicatif::{ProgressBar, ProgressDrawTarget}; use log::{debug, warn}; use crate::{ cachefile::CacheFile, cli::Cli, file::{Chunk, FileTrait}, + output::new_progressbar, sharry::{self, Client}, }; @@ -32,20 +32,6 @@ fn new_http(timeout: Option) -> ureq::Agent { .into() } -fn new_progressbar() -> ProgressBar { - ProgressBar::hidden().with_style( - ProgressStyle::with_template(&format!( - concat!( - "{{bar:50.cyan/blue}} {{msg:.magenta}}: ", - "{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ", - "({{eta}})", - ), - style("/").magenta(), - )) - .expect("invalid style template"), - ) -} - impl AppState { fn new(http: ureq::Agent, inner: CacheFile) -> Self { Self { @@ -64,34 +50,11 @@ impl AppState { } pub fn from_args(args: &Cli) -> sharry::Result { - let mut files = args.files.clone(); - - // TODO CLI switch begin - - let bar = new_progressbar(); - bar.set_draw_target(ProgressDrawTarget::stderr()); - // BOOKMARK assumption: total file size < 2 EiB - bar.set_length(files.iter().map(|f| f.get_size()).sum()); - bar.enable_steady_tick(Duration::from_millis(50)); - - for chk in &mut files { - bar.set_message(format!("hashing {:?}", chk.get_name())); - chk.hash(|bytes| bar.inc(bytes))?; - debug!("{chk:?}"); - } - - bar.finish(); - - // TODO CLI switch end - let http = new_http(args.get_timeout()); let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - Ok(Self::new( - http, - CacheFile::from_args(args, share_id).replace_files(files), - )) + Ok(Self::new(http, CacheFile::from_args(args, share_id)?)) } fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { @@ -179,7 +142,7 @@ impl AppState { self.http .share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - Ok(Self::new(self.http, CacheFile::from_args(args, share_id))) + Ok(Self::new(self.http, CacheFile::from_args(args, share_id)?)) } pub fn save(&self) -> io::Result<()> { diff --git a/src/cachefile.rs b/src/cachefile.rs index 1aed7bb..f595a29 100644 --- a/src/cachefile.rs +++ b/src/cachefile.rs @@ -3,14 +3,17 @@ use std::{ fs, io::{self, Write}, path::PathBuf, + time::Duration, }; -use log::trace; +use indicatif::ProgressDrawTarget; +use log::{debug, trace}; use serde::{Deserialize, Serialize}; use crate::{ cli::Cli, - file::{self, Chunk}, + file::{self, Chunk, FileTrait}, + output::new_progressbar, sharry::{self, Client, Uri}, }; @@ -56,22 +59,35 @@ impl CacheFile { Ok(Self { file_name, ..state }) } - pub fn from_args(args: &Cli, share_id: String) -> Self { - Self { + pub fn from_args(args: &Cli, share_id: String) -> io::Result { + // TODO CLI switch begin + + let mut files = args.files.clone(); + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + // BOOKMARK assumption: total file size < 2 EiB + bar.set_length(files.iter().map(|f| f.get_size()).sum()); + bar.enable_steady_tick(Duration::from_millis(50)); + + for chk in &mut files { + bar.set_message(format!("hashing {:?}", chk.get_name())); + chk.hash(|bytes| bar.inc(bytes))?; + debug!("{chk:?}"); + } + + bar.finish_with_message("finished hashing files"); + + // TODO CLI switch end + + Ok(Self { file_name: Self::cache_file(args), uri: args.get_uri(), alias_id: args.alias.clone(), share_id, uploading: None, - files: args.files.clone().into(), - } - } - - pub fn replace_files(self, files: Vec) -> Self { - Self { files: files.into(), - ..self - } + }) } pub fn queue_empty(&self) -> bool { diff --git a/src/output.rs b/src/output.rs index 0ca1038..73bd80c 100644 --- a/src/output.rs +++ b/src/output.rs @@ -2,6 +2,7 @@ use std::{fmt, process, sync::LazyLock}; use console::{StyledObject, style}; use dialoguer::{Select, theme::ColorfulTheme}; +use indicatif::{ProgressBar, ProgressStyle}; use log::{error, info}; use crate::sharry; @@ -43,6 +44,20 @@ where strs.iter().map(|&s| f(style(s)).to_string()).collect() } +pub fn new_progressbar() -> ProgressBar { + ProgressBar::hidden().with_style( + ProgressStyle::with_template(&format!( + concat!( + "{{bar:50.cyan/blue}} {{msg:.magenta}}: ", + "{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ", + "({{eta}})", + ), + style("/").magenta(), + )) + .expect("invalid style template"), + ) +} + pub enum Log {} impl Log { From 0b8bebad6ef07d9c76bafb51d8a03c6a6069ef97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Wed, 25 Jun 2025 10:44:36 +0000 Subject: [PATCH 5/6] implement better hashing - actually, calling `file::Checked::hash` in `AppState::from_args` was correct: If `AppState::rebuild_share` is called, hashes are in a known state. - call `check_hash` in `AppState::try_resume` - minor renamings --- src/appstate.rs | 89 ++++++++++++++++++++++++++++++++++++------- src/cachefile.rs | 53 +++++++++----------------- src/file/checked.rs | 2 +- src/file/mod.rs | 26 +++++++------ src/file/uploading.rs | 2 +- src/main.rs | 47 ++++++++++++++--------- src/sharry/api.rs | 2 +- src/sharry/client.rs | 2 +- 8 files changed, 142 insertions(+), 81 deletions(-) diff --git a/src/appstate.rs b/src/appstate.rs index 3c3fbf3..e04e8d6 100644 --- a/src/appstate.rs +++ b/src/appstate.rs @@ -1,7 +1,7 @@ use std::{fmt, io, time::Duration}; use indicatif::{ProgressBar, ProgressDrawTarget}; -use log::{debug, warn}; +use log::{debug, info, warn}; use crate::{ cachefile::CacheFile, @@ -41,20 +41,79 @@ impl AppState { } } - pub fn try_resume(args: &Cli) -> Option { - let inner = CacheFile::try_resume(args) - .inspect_err(|e| debug!("could not resume from hash {:?}: {e}", args.get_hash())) - .ok()?; + pub fn try_resume(args: &Cli) -> sharry::Result { + fn check_hash<'a>(file: &'a impl FileTrait<'a>, bar: &ProgressBar) -> sharry::Result<()> { + bar.set_message(format!("checking {:?}", file.get_name())); + match file.check_hash(|bytes| bar.inc(bytes)) { + Ok(true) => Ok(()), + Ok(false) => Err(sharry::ClientError::unknown(format!( + "Hash mismatch for file {:?}!", + file.get_name() + ))), + Err(e) => Err(e.into()), + } + } - Some(Self::new(new_http(args.get_timeout()), inner)) + let inner = CacheFile::try_resume(args)?; + + // TODO CLI switch begin + + info!("Checking hashes for {inner:?}"); + + // BOOKMARK assumption: total file size < 2 EiB + let total_size = { + let upl_size = if let Some(upl) = inner.peek_uploading() { + upl.get_size() + } else { + 0 + }; + upl_size + inner.queue().iter().map(|&f| f.get_size()).sum::() + }; + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + bar.set_length(total_size); + bar.enable_steady_tick(Duration::from_millis(50)); + + if let Some(upl) = inner.peek_uploading() { + check_hash(upl, &bar)?; + } + + for chk in inner.queue() { + check_hash(chk, &bar)?; + } + + bar.finish_with_message("finished checking files"); + + // TODO CLI switch end + + Ok(Self::new(new_http(args.get_timeout()), inner)) } pub fn from_args(args: &Cli) -> sharry::Result { + // TODO CLI switch begin + + let mut files = args.files.clone(); + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + // BOOKMARK assumption: total file size < 2 EiB + bar.set_length(files.iter().map(FileTrait::get_size).sum()); + bar.enable_steady_tick(Duration::from_millis(50)); + + for chk in &mut files { + bar.set_message(format!("hashing {:?}", chk.get_name())); + chk.hash(|bytes| bar.inc(bytes))?; + debug!("{chk:?}"); + } + + bar.finish_with_message("finished hashing files"); + + // TODO CLI switch end + let http = new_http(args.get_timeout()); - let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - - Ok(Self::new(http, CacheFile::from_args(args, share_id)?)) + Ok(Self::new(http, CacheFile::from_args(args, share_id, files))) } fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { @@ -123,7 +182,7 @@ impl AppState { self.drop_progressbar(ProgressBar::finish); } - Ok(self.inner.peek_uploading().is_none() && self.inner.queue_empty()) + Ok(self.inner.peek_uploading().is_none() && self.inner.queue().is_empty()) } pub fn rewind_chunk(mut self) -> Option { @@ -141,15 +200,19 @@ impl AppState { let share_id = self.http .share_create(&args.get_uri(), &args.alias, args.get_share_request())?; + let files = args.files.clone(); - Ok(Self::new(self.http, CacheFile::from_args(args, share_id)?)) + Ok(Self::new( + self.http, + CacheFile::from_args(args, share_id, files), + )) } pub fn save(&self) -> io::Result<()> { self.inner.save() } - pub fn clear(self) -> io::Result<()> { - self.inner.clear() + pub fn discard(self) -> io::Result<()> { + self.inner.discard() } } diff --git a/src/cachefile.rs b/src/cachefile.rs index f595a29..b91eb88 100644 --- a/src/cachefile.rs +++ b/src/cachefile.rs @@ -2,18 +2,15 @@ use std::{ collections::VecDeque, fs, io::{self, Write}, - path::PathBuf, - time::Duration, + path::{Path, PathBuf}, }; -use indicatif::ProgressDrawTarget; -use log::{debug, trace}; +use log::trace; use serde::{Deserialize, Serialize}; use crate::{ cli::Cli, - file::{self, Chunk, FileTrait}, - output::new_progressbar, + file::{self, Chunk}, sharry::{self, Client, Uri}, }; @@ -59,39 +56,19 @@ impl CacheFile { Ok(Self { file_name, ..state }) } - pub fn from_args(args: &Cli, share_id: String) -> io::Result { - // TODO CLI switch begin - - let mut files = args.files.clone(); - - let bar = new_progressbar(); - bar.set_draw_target(ProgressDrawTarget::stderr()); - // BOOKMARK assumption: total file size < 2 EiB - bar.set_length(files.iter().map(|f| f.get_size()).sum()); - bar.enable_steady_tick(Duration::from_millis(50)); - - for chk in &mut files { - bar.set_message(format!("hashing {:?}", chk.get_name())); - chk.hash(|bytes| bar.inc(bytes))?; - debug!("{chk:?}"); - } - - bar.finish_with_message("finished hashing files"); - - // TODO CLI switch end - - Ok(Self { + pub fn from_args(args: &Cli, share_id: String, files: Vec) -> Self { + Self { file_name: Self::cache_file(args), uri: args.get_uri(), alias_id: args.alias.clone(), share_id, uploading: None, files: files.into(), - }) + } } - pub fn queue_empty(&self) -> bool { - self.files.is_empty() + pub fn queue(&self) -> Vec<&file::Checked> { + self.files.iter().collect() } pub fn get_uploading( @@ -173,10 +150,18 @@ impl CacheFile { Ok(()) } - pub fn clear(self) -> io::Result<()> { - fs::remove_file(&self.file_name)?; + fn remove(path: &Path) -> io::Result<()> { + fs::remove_file(path)?; - trace!("removed {:?}", self.file_name.display()); + trace!("removed {:?}", path.display()); Ok(()) } + + pub fn clear_any(args: &Cli) { + let _ = Self::remove(&Self::cache_file(args)); + } + + pub fn discard(self) -> io::Result<()> { + Self::remove(&self.file_name) + } } diff --git a/src/file/checked.rs b/src/file/checked.rs index 6dbf7a4..689bdb3 100644 --- a/src/file/checked.rs +++ b/src/file/checked.rs @@ -102,6 +102,6 @@ impl<'t> FileTrait<'t> for Checked { } fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result { - super::check_file_hash(&self.path, self.size, &self.hash, on_progress) + super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress) } } diff --git a/src/file/mod.rs b/src/file/mod.rs index 1e99682..f2df684 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -14,12 +14,10 @@ use blake2b_simd::Params as Blake2b; pub use checked::Checked; pub use chunk::Chunk; +use log::debug; pub use uploading::Uploading; -fn compute_file_hash

(path: P, size: u64, on_progress: impl Fn(u64)) -> io::Result -where - P: AsRef, -{ +fn compute_file_hash(path: &Path, size: u64, on_progress: impl Fn(u64)) -> io::Result { let mut file = fs::File::open(path)?; let mut hasher = Blake2b::new().hash_length(64).to_state(); @@ -39,23 +37,29 @@ where if bytes_read != size { return Err(io::Error::other(format!( - "Hashed {bytes_read:?} bytes, known file size {:?}!", - size + "Hashed {bytes_read:?} bytes, known file size {size:?}!" ))); } - Ok(Base64::encode_string(hasher.finalize().as_bytes())) + let result = Base64::encode_string(hasher.finalize().as_bytes()); + debug!("hashed {:?}: {result:?}", path.display()); + Ok(result) } fn check_file_hash( - path: impl AsRef, + path: &Path, size: u64, - hash: &Option, + hash: Option<&String>, on_progress: impl Fn(u64), ) -> io::Result { - let Some(hash) = hash else { return Ok(false) }; + let Some(hash) = hash else { + debug!("no hash to check for {:?}!", path.display()); + return Ok(false); + }; - Ok(*hash == compute_file_hash(path, size, on_progress)?) + let result = *hash == compute_file_hash(path, size, on_progress)?; + debug!("matches {:?}: {result:?}", *hash); + Ok(result) } pub trait FileTrait<'t> { diff --git a/src/file/uploading.rs b/src/file/uploading.rs index cb91d07..079317b 100644 --- a/src/file/uploading.rs +++ b/src/file/uploading.rs @@ -102,6 +102,6 @@ impl<'t> FileTrait<'t> for Uploading { } fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result { - super::check_file_hash(&self.path, self.size, &self.hash, on_progress) + super::check_file_hash(&self.path, self.size, self.hash.as_ref(), on_progress) } } diff --git a/src/main.rs b/src/main.rs index 797a25f..e1b8655 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,7 +35,32 @@ fn main() { println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL); - let mut state = AppState::try_resume(&args) + let resumed = AppState::try_resume(&args); + + let check_ctrlc = { + let stop = Arc::new(AtomicBool::new(false)); + let stop_ctrlc = stop.clone(); + + ctrlc::set_handler(move || { + stop_ctrlc.store(true, Ordering::SeqCst); + eprintln!("{} stopping as soon as possible!", *SHRUPL); + }) + .expect("Error setting Ctrl-C handler"); + + move || { + if stop.load(Ordering::SeqCst) { + process::exit(255); + } + } + }; + + let mut state = resumed + .inspect_err(|e| { + cachefile::CacheFile::clear_any(&args); + Log::handle(e); + info!("could not resume from hash {:?}: {e}", args.get_hash()); + }) + .ok() .and_then(|state| output::prompt_continue().then_some(state)) .unwrap_or_else(|| match AppState::from_args(&args) { Ok(state) => { @@ -50,27 +75,11 @@ fn main() { } }); - let check_ctrlc = { - let stop = Arc::new(AtomicBool::new(false)); - let stop_ctrlc = stop.clone(); - - ctrlc::set_handler(move || { - stop_ctrlc.store(true, Ordering::SeqCst); - info!("stopping as soon as possible ..."); - }) - .expect("Error setting Ctrl-C handler"); - - move || { - if stop.load(Ordering::SeqCst) { - process::exit(255); - } - } - }; + check_ctrlc(); info!("continuing with state: {state:#?}"); let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", "); - println!("{} is uploading: {fns_magenta}", *SHRUPL); let mut buffer = vec![0; args.chunk_size * 1024 * 1024]; @@ -133,7 +142,7 @@ fn main() { check_ctrlc(); } - state.clear().unwrap_or_else(|e| { + state.discard().unwrap_or_else(|e| { Log::warning(format_args!("Failed to remove state: {e}")); }); diff --git a/src/sharry/api.rs b/src/sharry/api.rs index 3f11560..c34ab31 100644 --- a/src/sharry/api.rs +++ b/src/sharry/api.rs @@ -20,7 +20,7 @@ impl AsRef<[u8]> for Uri { impl Uri { pub fn new(protocol: impl fmt::Display, base_url: impl fmt::Display) -> Self { - Self(format!("{}://{}", protocol, base_url)) + Self(format!("{protocol}://{base_url}")) } fn endpoint(&self, path: fmt::Arguments) -> String { diff --git a/src/sharry/client.rs b/src/sharry/client.rs index d3fdac0..6f87916 100644 --- a/src/sharry/client.rs +++ b/src/sharry/client.rs @@ -118,7 +118,7 @@ pub enum ClientError { #[error("Invalid {0}")] InvalidParameter(Parameter), - #[error("Unknown error: {0}")] + #[error("{0}")] Unknown(String), } From 465c857126bdd8a4a9e12711836ba2245dbabaae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= <40151420+ldericher@users.noreply.github.com> Date: Wed, 25 Jun 2025 22:47:55 +0000 Subject: [PATCH 6/6] implement better hashing - move hashing back into `cachefile` to correctly handle the `rebuild_share` case - add `-n` / `--no-hash` CLI switch to explicitly skip hashing --- notes.md | 5 --- src/appstate.rs | 94 +++++++----------------------------------------- src/cachefile.rs | 84 +++++++++++++++++++++++++++++++++++++++---- src/cli.rs | 13 +++++-- 4 files changed, 101 insertions(+), 95 deletions(-) diff --git a/notes.md b/notes.md index 7603faa..7fc5428 100644 --- a/notes.md +++ b/notes.md @@ -49,8 +49,3 @@ - "continue" and "new" flags to avoid user interaction - "quiet" flag to disable output entirely - some switch to change log to "pretty-print" - -- hashing - - store file hashes with all `file::*` variants - - check hashes on "continue" - - CLI switch to skip hashing diff --git a/src/appstate.rs b/src/appstate.rs index 126059a..fae1a55 100644 --- a/src/appstate.rs +++ b/src/appstate.rs @@ -1,7 +1,7 @@ use std::{fmt, io, time::Duration}; use indicatif::{ProgressBar, ProgressDrawTarget}; -use log::{debug, info, warn}; +use log::{debug, warn}; use crate::{ cachefile::CacheFile, @@ -26,13 +26,17 @@ impl fmt::Debug for AppState { } } -fn new_http(timeout: Option) -> ureq::Agent { +fn new_http(args: &Cli) -> ureq::Agent { ureq::Agent::config_builder() - .timeout_global(timeout) + .timeout_global(args.get_timeout()) .build() .into() } +fn new_share(args: &Cli) -> error::Result { + new_http(args).share_create(&args.get_uri(), &args.alias, args.get_share_request()) +} + impl AppState { fn new(http: ureq::Agent, inner: CacheFile) -> Self { Self { @@ -43,78 +47,14 @@ impl AppState { } pub fn try_resume(args: &Cli) -> error::Result { - fn check_hash<'a>(file: &'a impl FileTrait<'a>, bar: &ProgressBar) -> error::Result<()> { - bar.set_message(format!("checking {:?}", file.get_name())); - match file.check_hash(|bytes| bar.inc(bytes)) { - Ok(true) => Ok(()), - Ok(false) => Err(error::Error::unknown(format!( - "Hash mismatch for file {:?}!", - file.get_name() - ))), - Err(e) => Err(e.into()), - } - } - - let inner = CacheFile::try_resume(args)?; - - // TODO CLI switch begin - - info!("Checking hashes for {inner:?}"); - - // BOOKMARK assumption: total file size < 2 EiB - let total_size = { - let upl_size = if let Some(upl) = inner.peek_uploading() { - upl.get_size() - } else { - 0 - }; - upl_size + inner.queue().iter().map(|&f| f.get_size()).sum::() - }; - - let bar = new_progressbar(); - bar.set_draw_target(ProgressDrawTarget::stderr()); - bar.set_length(total_size); - bar.enable_steady_tick(Duration::from_millis(50)); - - if let Some(upl) = inner.peek_uploading() { - check_hash(upl, &bar)?; - } - - for chk in inner.queue() { - check_hash(chk, &bar)?; - } - - bar.finish_with_message("finished checking files"); - - // TODO CLI switch end - - Ok(Self::new(new_http(args.get_timeout()), inner)) + Ok(Self::new(new_http(args), CacheFile::try_resume(args)?)) } pub fn from_args(args: &Cli) -> error::Result { - // TODO CLI switch begin - - let mut files = args.files.clone(); - - let bar = new_progressbar(); - bar.set_draw_target(ProgressDrawTarget::stderr()); - // BOOKMARK assumption: total file size < 2 EiB - bar.set_length(files.iter().map(FileTrait::get_size).sum()); - bar.enable_steady_tick(Duration::from_millis(50)); - - for chk in &mut files { - bar.set_message(format!("hashing {:?}", chk.get_name())); - chk.hash(|bytes| bar.inc(bytes))?; - debug!("{chk:?}"); - } - - bar.finish_with_message("finished hashing files"); - - // TODO CLI switch end - - let http = new_http(args.get_timeout()); - let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - Ok(Self::new(http, CacheFile::from_args(args, share_id, files))) + Ok(Self::new( + new_http(args), + CacheFile::from_args(args, new_share)?, + )) } fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) { @@ -199,15 +139,7 @@ impl AppState { } pub fn rebuild_share(self, args: &Cli) -> error::Result { - let share_id = - self.http - .share_create(&args.get_uri(), &args.alias, args.get_share_request())?; - let files = args.files.clone(); - - Ok(Self::new( - self.http, - CacheFile::from_args(args, share_id, files), - )) + Ok(Self::new(self.http, CacheFile::from_args(args, new_share)?)) } pub fn save(&self) -> io::Result<()> { diff --git a/src/cachefile.rs b/src/cachefile.rs index 3860fe1..d0a0592 100644 --- a/src/cachefile.rs +++ b/src/cachefile.rs @@ -3,15 +3,18 @@ use std::{ fs, io::{self, Write}, path::{Path, PathBuf}, + time::Duration, }; -use log::trace; +use indicatif::{ProgressBar, ProgressDrawTarget}; +use log::{info, trace}; use serde::{Deserialize, Serialize}; use crate::{ cli::Cli, error, - file::{self, Chunk}, + file::{self, Chunk, FileTrait}, + output::new_progressbar, sharry::{Client, Uri}, }; @@ -45,7 +48,7 @@ impl CacheFile { file_name } - pub fn try_resume(args: &Cli) -> io::Result { + pub fn try_resume(args: &Cli) -> error::Result { let file_name = Self::cache_file(args); let state: Self = { @@ -54,18 +57,85 @@ impl CacheFile { serde_json::from_reader(reader).map_err(io::Error::other)? }; + if args.should_hash() { + fn check_hash<'a>( + file: &'a impl FileTrait<'a>, + bar: &ProgressBar, + ) -> error::Result<()> { + bar.set_message(format!("checking {:?}", file.get_name())); + + match file.check_hash(|bytes| bar.inc(bytes)) { + Ok(true) => Ok(()), + Ok(false) => Err(error::Error::unknown(format!( + "Hash mismatch for file {:?}!", + file.get_name() + ))), + Err(e) => Err(e.into()), + } + } + + info!("checking files in {state:?}"); + + // BOOKMARK assumption: total file size < 2 EiB + let total_size = { + let upl_size = if let Some(upl) = state.peek_uploading() { + upl.get_size() + } else { + 0 + }; + upl_size + state.queue().iter().map(|&f| f.get_size()).sum::() + }; + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + bar.set_length(total_size); + bar.enable_steady_tick(Duration::from_millis(50)); + + if let Some(upl) = state.peek_uploading() { + check_hash(upl, &bar)?; + } + + for chk in state.queue() { + check_hash(chk, &bar)?; + } + + bar.finish_with_message("finished checking files"); + } + Ok(Self { file_name, ..state }) } - pub fn from_args(args: &Cli, share_id: String, files: Vec) -> Self { - Self { + pub fn from_args( + args: &Cli, + new_share: impl FnOnce(&Cli) -> error::Result, + ) -> error::Result { + let mut files = args.files.clone(); + + if args.should_hash() { + info!("hashing files {files:?}"); + + let bar = new_progressbar(); + bar.set_draw_target(ProgressDrawTarget::stderr()); + // BOOKMARK assumption: total file size < 2 EiB + bar.set_length(files.iter().map(FileTrait::get_size).sum()); + bar.enable_steady_tick(Duration::from_millis(50)); + + for chk in &mut files { + bar.set_message(format!("hashing {:?}", chk.get_name())); + chk.hash(|bytes| bar.inc(bytes))?; + } + + bar.finish_with_message("finished hashing files"); + } + + Ok(Self { file_name: Self::cache_file(args), uri: args.get_uri(), alias_id: args.alias.clone(), - share_id, + share_id: new_share(args)?, uploading: None, files: files.into(), - } + }) } pub fn queue(&self) -> Vec<&file::Checked> { diff --git a/src/cli.rs b/src/cli.rs index ad96bb5..8013f9c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -39,7 +39,7 @@ pub struct Cli { /// Name of the new share #[arg(short, long, default_value = "ShrUpl Upload", value_name = "TEXT")] - name: String, + share_name: String, /// Description of the new share #[arg(short, long, value_name = "TEXT")] @@ -57,6 +57,10 @@ pub struct Cli { )] pub chunk_size: usize, + /// Don't hash files before uploading + #[arg(short, long)] + no_hash: bool, + /// Increase output verbosity #[arg(short, long, action = clap::ArgAction::Count)] verbose: u8, @@ -125,9 +129,14 @@ impl Cli { } } + #[must_use] + pub fn should_hash(&self) -> bool { + !self.no_hash + } + #[must_use] pub fn get_share_request(&self) -> NewShareRequest { - NewShareRequest::new(&self.name, self.description.as_ref(), self.max_views) + NewShareRequest::new(&self.share_name, self.description.as_ref(), self.max_views) } #[must_use]