Compare commits

...

3 commits

Author SHA1 Message Date
351596d56d implement better hashing
- call `file::Checked::hash` in `CacheFile::from_args` instead
2025-06-24 21:58:18 +00:00
ea5ef1fa10 implement better hashing
- call `file::Checked::hash` in `AppState::from_args`
2025-06-24 19:34:11 +00:00
2bcbc0d71c implement better hashing
- use `blake2b_simd` crate and optimize
2025-06-24 19:27:13 +00:00
11 changed files with 208 additions and 116 deletions

View file

@ -1,2 +1,9 @@
[build]
target = "x86_64-unknown-linux-musl"
# rustflags = [
# # emit instructions tuned to the current CPU
# "-C", "target-cpu=native",
# # assume CPU features
# "-C", "target-feature=+avx2,+sse4.1,+ssse3,+aes",
# ]

76
Cargo.lock generated
View file

@ -67,6 +67,18 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "base64"
version = "0.22.1"
@ -86,21 +98,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "blake2"
version = "0.10.6"
name = "blake2b_simd"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
checksum = "06e903a20b159e944f91ec8499fe1e55651480c541ea0a584f5d967c49ad9d99"
dependencies = [
"digest",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
"arrayref",
"arrayvec",
"constant_time_eq",
]
[[package]]
@ -195,6 +200,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "cookie"
version = "0.18.1"
@ -233,16 +244,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "ctrlc"
version = "3.4.7"
@ -273,17 +274,6 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
@ -385,16 +375,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@ -902,7 +882,7 @@ name = "shrupl"
version = "0.1.0-alpha"
dependencies = [
"base64ct",
"blake2",
"blake2b_simd",
"clap",
"console",
"ctrlc",
@ -1045,12 +1025,6 @@ dependencies = [
"zerovec",
]
[[package]]
name = "typenum"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "unicode-ident"
version = "1.0.18"

View file

@ -6,7 +6,7 @@ description = "ShrUpl is a tool to upload files to a Sharry Instance through a p
[dependencies]
base64ct = { version = "1.8.0", default-features = false, features = ["alloc"] }
blake2 = { version = "0.10.6", default-features = false }
blake2b_simd = "1.0.3"
clap = { version = "4.5.38", features = ["derive"] }
console = { version = "0.15.11", default-features = false }
ctrlc = { version = "3.4.7", features = ["termination"] }
@ -22,5 +22,11 @@ thiserror = "2.0.12"
ureq = { version = "3.0.11", features = ["json"] }
[profile.release]
# Optimize for speed even more aggressively
opt-level = "z"
# better inlining
codegen-units = 1
# linkertime optimization
lto = true
debug = false
panic = "abort"

View file

@ -1,13 +1,13 @@
use std::{fmt, io, time::Duration};
use console::style;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use indicatif::{ProgressBar, ProgressDrawTarget};
use log::{debug, warn};
use crate::{
cachefile::CacheFile,
cli::Cli,
file::{Chunk, FileTrait},
output::new_progressbar,
sharry::{self, Client},
};
@ -46,6 +46,8 @@ impl AppState {
.inspect_err(|e| debug!("could not resume from hash {:?}: {e}", args.get_hash()))
.ok()?;
inner.peek_uploading();
Some(Self::new(new_http(args.get_timeout()), inner))
}
@ -54,23 +56,11 @@ impl AppState {
let share_id = http.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(http, CacheFile::from_args(args, share_id)))
Ok(Self::new(http, CacheFile::from_args(args, share_id)?))
}
fn with_progressbar(&mut self, f: impl FnOnce(&ProgressBar), drop_bar: bool) {
let bar = &*self.progress.get_or_insert_with(|| {
ProgressBar::hidden().with_style(
ProgressStyle::with_template(&format!(
concat!(
"{{bar:50.cyan/blue}} {{msg:.magenta}}: ",
"{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ",
"({{eta}})",
),
style("/").magenta(),
))
.expect("style template is not valid"),
)
});
let bar = &*self.progress.get_or_insert_with(new_progressbar);
if let Some(upl) = self.inner.peek_uploading() {
if bar.length().is_none() {
@ -154,7 +144,7 @@ impl AppState {
self.http
.share_create(&args.get_uri(), &args.alias, args.get_share_request())?;
Ok(Self::new(self.http, CacheFile::from_args(args, share_id)))
Ok(Self::new(self.http, CacheFile::from_args(args, share_id)?))
}
pub fn save(&self) -> io::Result<()> {

View file

@ -3,14 +3,17 @@ use std::{
fs,
io::{self, Write},
path::PathBuf,
time::Duration,
};
use log::trace;
use indicatif::ProgressDrawTarget;
use log::{debug, trace};
use serde::{Deserialize, Serialize};
use crate::{
cli::Cli,
file::{self, Chunk},
file::{self, Chunk, FileTrait},
output::new_progressbar,
sharry::{self, Client, Uri},
};
@ -56,15 +59,35 @@ impl CacheFile {
Ok(Self { file_name, ..state })
}
pub fn from_args(args: &Cli, share_id: String) -> Self {
Self {
pub fn from_args(args: &Cli, share_id: String) -> io::Result<Self> {
// TODO CLI switch begin
let mut files = args.files.clone();
let bar = new_progressbar();
bar.set_draw_target(ProgressDrawTarget::stderr());
// BOOKMARK assumption: total file size < 2 EiB
bar.set_length(files.iter().map(|f| f.get_size()).sum());
bar.enable_steady_tick(Duration::from_millis(50));
for chk in &mut files {
bar.set_message(format!("hashing {:?}", chk.get_name()));
chk.hash(|bytes| bar.inc(bytes))?;
debug!("{chk:?}");
}
bar.finish_with_message("finished hashing files");
// TODO CLI switch end
Ok(Self {
file_name: Self::cache_file(args),
uri: args.get_uri(),
alias_id: args.alias.clone(),
share_id,
uploading: None,
files: args.files.clone().into(),
}
files: files.into(),
})
}
pub fn queue_empty(&self) -> bool {

View file

@ -1,7 +1,7 @@
use std::{convert::Infallible, fmt, io, time::Duration};
use base64ct::{Base64UrlUnpadded, Encoding};
use blake2::{Blake2b, Digest, digest::consts::U16};
use blake2b_simd::Params as Blake2b;
use clap::{
Parser,
builder::{PossibleValuesParser, TypedValueParser},
@ -96,8 +96,6 @@ fn parse_sharry_file(data: &str) -> io::Result<Checked> {
Checked::new(data)
}
type Blake2b128 = Blake2b<U16>;
fn sorted<T>(values: &[T]) -> Vec<&T>
where
T: Ord,
@ -143,14 +141,15 @@ impl Cli {
}
pub fn get_hash(&self) -> String {
let mut hasher = Blake2b128::new();
hasher.update(self.get_uri());
hasher.update(&self.alias);
let mut hasher = Blake2b::new().hash_length(16).to_state();
hasher.update(self.get_uri().as_ref());
hasher.update(self.alias.as_bytes());
for chk in sorted(&self.files) {
hasher.update(chk);
hasher.update(chk.as_ref());
}
Base64UrlUnpadded::encode_string(&hasher.finalize())
Base64UrlUnpadded::encode_string(hasher.finalize().as_bytes())
}
}

View file

@ -20,6 +20,8 @@ pub struct Checked {
pub(super) path: PathBuf,
/// size of that file
pub(super) size: u64,
/// hash of that file
pub(super) hash: Option<String>,
}
impl AsRef<[u8]> for Checked {
@ -41,6 +43,7 @@ impl Checked {
Ok(Self {
path: fs::canonicalize(&value)?,
size: meta.len(),
hash: None,
})
} else {
Err(io::Error::new(
@ -50,6 +53,19 @@ impl Checked {
}
}
pub fn hash(&mut self, f: impl Fn(u64)) -> io::Result<()> {
if self.hash.is_some() {
return Err(io::Error::other(format!(
"file {:?} is already hashed!",
self.path.display()
)));
}
self.hash = Some(super::compute_file_hash(&self.path, self.size, f)?);
Ok(())
}
/// start uploading this file
///
/// - tries to create a new entry in a share
@ -68,7 +84,7 @@ impl Checked {
) -> sharry::Result<Uploading> {
let file_id = client.file_create(uri, alias_id, share_id, &self)?;
Ok(Uploading::new(self.path, self.size, file_id))
Ok(Uploading::new(self.path, self.size, self.hash, file_id))
}
}
@ -84,4 +100,8 @@ impl<'t> FileTrait<'t> for Checked {
fn get_size(&self) -> u64 {
self.size
}
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
}
}

View file

@ -2,12 +2,62 @@ mod checked;
mod chunk;
mod uploading;
use std::{ffi::OsStr, path::Path};
use std::{
ffi::OsStr,
fs,
io::{self, Read},
path::Path,
};
use base64ct::{Base64, Encoding};
use blake2b_simd::Params as Blake2b;
pub use checked::Checked;
pub use chunk::Chunk;
pub use uploading::Uploading;
fn compute_file_hash<P>(path: P, size: u64, on_progress: impl Fn(u64)) -> io::Result<String>
where
P: AsRef<Path>,
{
let mut file = fs::File::open(path)?;
let mut hasher = Blake2b::new().hash_length(64).to_state();
let mut buf = vec![0u8; 4 * 1024 * 1024];
let mut bytes_read = 0;
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
bytes_read += n as u64;
on_progress(n as u64);
}
if bytes_read != size {
return Err(io::Error::other(format!(
"Hashed {bytes_read:?} bytes, known file size {:?}!",
size
)));
}
Ok(Base64::encode_string(hasher.finalize().as_bytes()))
}
fn check_file_hash(
path: impl AsRef<Path>,
size: u64,
hash: &Option<String>,
on_progress: impl Fn(u64),
) -> io::Result<bool> {
let Some(hash) = hash else { return Ok(false) };
Ok(*hash == compute_file_hash(path, size, on_progress)?)
}
pub trait FileTrait<'t> {
/// extract the filename part of a `Path` reference
///
@ -25,4 +75,6 @@ pub trait FileTrait<'t> {
/// get the file's size
fn get_size(&self) -> u64;
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool>;
}

View file

@ -11,8 +11,12 @@ use super::{Checked, Chunk, FileTrait};
#[derive(Serialize, Deserialize, Debug)]
pub struct Uploading {
/// canonical path to a regular file
path: PathBuf,
/// size of that file
size: u64,
/// hash of that file
hash: Option<String>,
file_id: String,
#[serde(skip)]
last_offset: Option<u64>,
@ -20,10 +24,11 @@ pub struct Uploading {
}
impl Uploading {
pub(super) fn new(path: PathBuf, size: u64, file_id: String) -> Self {
pub(super) fn new(path: PathBuf, size: u64, hash: Option<String>, file_id: String) -> Self {
Self {
path,
size,
hash,
file_id,
last_offset: None,
offset: 0,
@ -79,6 +84,7 @@ impl Uploading {
Checked {
path: self.path,
size: self.size,
hash: self.hash,
}
}
}
@ -94,4 +100,8 @@ impl<'t> FileTrait<'t> for Uploading {
fn get_size(&self) -> u64 {
self.size
}
fn check_hash(&self, on_progress: impl Fn(u64)) -> io::Result<bool> {
super::check_file_hash(&self.path, self.size, &self.hash, on_progress)
}
}

View file

@ -24,6 +24,32 @@ use output::{Log, SHRUPL};
use sharry::{ClientError, Parameter};
fn main() {
let args = Cli::parse();
env_logger::Builder::new()
.filter_module("shrupl", args.get_level_filter())
.parse_default_env()
.init();
info!("args: {args:#?}");
println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL);
let mut state = AppState::try_resume(&args)
.and_then(|state| output::prompt_continue().then_some(state))
.unwrap_or_else(|| match AppState::from_args(&args) {
Ok(state) => {
state.save().unwrap_or_else(|e| {
Log::warning(format_args!("Failed to save state: {e}"));
});
state
}
Err(e) => {
Log::handle(&e);
Log::error(format_args!("Failed to create state: {e}"));
}
});
let check_ctrlc = {
let stop = Arc::new(AtomicBool::new(false));
let stop_ctrlc = stop.clone();
@ -41,36 +67,6 @@ fn main() {
}
};
let args = Cli::parse();
env_logger::Builder::new()
.filter_module("shrupl", args.get_level_filter())
.parse_default_env()
.init();
info!("args: {args:#?}");
println!("{} to {}!", style("Welcome").magenta().bold(), *SHRUPL);
let mut state = AppState::try_resume(&args)
.and_then(|state| output::prompt_continue().then_some(state))
.unwrap_or_else(|| {
check_ctrlc();
match AppState::from_args(&args) {
Ok(state) => {
state.save().unwrap_or_else(|e| {
Log::warning(format_args!("Failed to save state: {e}"));
});
state
}
Err(e) => {
Log::handle(&e);
Log::error(format_args!("Failed to create state: {e}"));
}
}
});
info!("continuing with state: {state:#?}");
let fns_magenta = output::style_all(&args.file_names(), StyledObject::magenta).join(", ");

View file

@ -2,6 +2,7 @@ use std::{fmt, process, sync::LazyLock};
use console::{StyledObject, style};
use dialoguer::{Select, theme::ColorfulTheme};
use indicatif::{ProgressBar, ProgressStyle};
use log::{error, info};
use crate::sharry;
@ -43,6 +44,20 @@ where
strs.iter().map(|&s| f(style(s)).to_string()).collect()
}
pub fn new_progressbar() -> ProgressBar {
ProgressBar::hidden().with_style(
ProgressStyle::with_template(&format!(
concat!(
"{{bar:50.cyan/blue}} {{msg:.magenta}}: ",
"{{binary_bytes:.yellow}}{}{{binary_total_bytes:.yellow}} ",
"({{eta}})",
),
style("/").magenta(),
))
.expect("invalid style template"),
)
}
pub enum Log {}
impl Log {