From 2c4439f32986e03077e224369be9bb2b449c0181 Mon Sep 17 00:00:00 2001 From: Alexis Praga Date: Sat, 15 Jun 2024 16:58:16 +0200 Subject: [PATCH] Pull only a single file or a whole folder. This is set using -l or --limit flag. If a single file is given, it should be the full path (from the repo) otherwise it won't match. If a directory is given, match all files and subdirectories. If a subdirectory is given, it should have all parents from the repo root. Split the generation of a download queue in `pull` function into a separate function for readability. --- src/lib/data.rs | 99 +++++++++++++++++++++++++++++++++------------- src/lib/project.rs | 16 +++++--- src/main.rs | 11 ++++-- 3 files changed, 89 insertions(+), 37 deletions(-) diff --git a/src/lib/data.rs b/src/lib/data.rs index 41a0767..c6d8324 100644 --- a/src/lib/data.rs +++ b/src/lib/data.rs @@ -397,11 +397,11 @@ impl DataFile { pub fn directory(&self) -> Result { let path = std::path::Path::new(&self.path); Ok(path - .parent() - .unwrap_or(path) - .to_str() - .unwrap_or("") - .to_string()) + .parent() + .unwrap_or(path) + .to_str() + .unwrap_or("") + .to_string()) } pub async fn get_md5(&self, path_context: &Path) -> Result> { @@ -587,7 +587,7 @@ impl DataCollection { } else { Err(anyhow!( "File '{}' is already registered in the data manifest.\n\ - If you wish to update the MD5 or metadata, use: sdf update FILE", + If you wish to update the MD5 or metadata, use: sdf update FILE", &data_file.path )) } @@ -719,7 +719,7 @@ impl DataCollection { match data_file { None => Err(anyhow!( "Data file '{}' is not in the data manifest. Add it first using:\n \ - $ sdf track {}\n", + $ sdf track {}\n", filepath, filepath )), @@ -742,7 +742,7 @@ impl DataCollection { match data_file { None => Err(anyhow!( "Cannot untrack data file '{}' since it was never added to\ - the data manifest.", + the data manifest.", filepath )), Some(file) => file.set_untracked(), @@ -794,7 +794,7 @@ impl DataCollection { match result { Ok((key, value)) => { pb.bar - .set_message(format!("Fetching remote files... {} done.", key.0)); + .set_message(format!("Fetching remote files... {} done.", key.0)); all_remote_files.insert(key, value); pb.bar.inc(1); } @@ -913,7 +913,7 @@ impl DataCollection { while let Some(result) = statuses_futures.next().await { if let Ok((key, value)) = result { pb.bar - .set_message(format!("Calculating MD5s... {} done.", &value.name)); + .set_message(format!("Calculating MD5s... {} done.", &value.name)); statuses.entry(key).or_insert_with(Vec::new).push(value); pb.bar.inc(1); } else { @@ -1059,13 +1059,40 @@ impl DataCollection { Ok(()) } - pub async fn pull_urls(&mut self, path_context: &Path, overwrite: bool) -> Result<()> { + // Compare a local path `local` to a user request `request` (either a path or a directory) + // - if request is a path, the full paths of both must match exactly + // - otherwise request is a directory and one of the ancestor must match + // Exemple + // - "a/test.txt" would not match "test.text" (first case) + // - "a/b/c/test.txt" would match "a/b/c" or "a/b" (second case) + // - "a/b/c/test.txt" would not match "b/c" + fn match_user_file(local: &String, request: &Option) -> bool { + let p = PathBuf::from(local); + if let Some(req) = request { + p == *req || Self::has_common_ancestor(p, req) + } + else { + false + } + } + + // Common ancestor starting from root between a filepath and a directory + // - "a/b/c/test.txt" and "a/b/c" have one + // - "a/b/c/test.txt" and "b/c" and don't + fn has_common_ancestor(file: PathBuf, dir: &PathBuf) -> bool { + file.ancestors().filter(|x| !x.as_os_str().is_empty() && x == dir).count() > 0 + } + + pub async fn pull_urls(&mut self, path_context: &Path, overwrite: bool, limit: &Option) -> Result<()> { let mut downloads = Downloads::new(); let mut filepaths = Vec::new(); let mut skipped = Vec::new(); let mut num_downloaded = 0; for data_file in self.files.values() { if let Some(url) = &data_file.url { + if !Self::match_user_file(&data_file.path, limit) { + continue; + } let full_path = data_file.full_path(path_context)?; let download = downloads.add(url.clone(), Some(&full_path.to_string_lossy()), overwrite)?; @@ -1088,35 +1115,33 @@ impl DataCollection { let num_skipped = skipped.len(); println!( "{} files were downloaded.\n\ - {} files were skipped because they existed (and --overwrite was not specified).", + {} files were skipped because they existed (and --overwrite was not specified).", num_downloaded, num_skipped ); Ok(()) } - // Download all files - // - // TODO: code redundancy with the push method's tracking of - // why stuff is skipped; split out info enum, etc. - pub async fn pull(&mut self, path_context: &Path, overwrite: bool) -> Result<()> { - let all_files = self.merge(true).await?; - - let mut downloads = Downloads::new(); - - let mut current_skipped = Vec::new(); - let mut messy_skipped = Vec::new(); - let mut overwrite_skipped = Vec::new(); - + async fn pull_get_downloads(&mut self, all_files: &HashMap>, path_context: &Path, + overwrite: bool, + request: &Option, + current_skipped: &mut Vec, + messy_skipped: &mut Vec, + overwrite_skipped: &mut Vec, + downloads: &mut Downloads) -> Result<()> { for (dir, merged_files) in all_files.iter() { - // can_download() is true only if local and remote are not None. + // 1. Skip non matching files if needed + // 2. can_download() is true only if local and remote are not None. // (local file can be deleted, but will only be None if not in manifest also) - for merged_file in merged_files.values().filter(|f| f.can_download()) { + let filtered = merged_files.iter().filter(|(k,v)| Self::match_user_file(k, request) + && v.can_download()); + for (_, merged_file) in filtered { let path = merged_file.name()?; + println!("filtered {:?}", merged_file); let do_download = match merged_file.status(path_context).await? { RemoteStatusCode::NoLocal => { return Err(anyhow!("Internal error: execution should not have reached this point, please report.\n\ - 'sdf pull' filtered by MergedFile.can_download() but found a RemoteStatusCode::NoLocal status.")); + 'sdf pull' filtered by MergedFile.can_download() but found a RemoteStatusCode::NoLocal status.")); } RemoteStatusCode::Current => { current_skipped.push(path); @@ -1159,6 +1184,24 @@ impl DataCollection { } } } + Ok(()) + } + + // Download all files, or a set of matching files + // + // TODO: code redundancy with the push method's tracking of + // why stuff is skipped; split out info enum, etc. + pub async fn pull(&mut self, path_context: &Path, overwrite: bool, limit: &Option) -> Result<()> { + let all_files = self.merge(true).await?; + + let mut current_skipped = Vec::new(); + let mut messy_skipped = Vec::new(); + let mut overwrite_skipped = Vec::new(); + + let mut downloads = Downloads::new(); + self.pull_get_downloads(&all_files, path_context, overwrite, limit, + &mut current_skipped, &mut messy_skipped, + &mut overwrite_skipped, &mut downloads).await?; // now retrieve all the files in the queue. downloads diff --git a/src/lib/project.rs b/src/lib/project.rs index 3402f94..07eab2d 100644 --- a/src/lib/project.rs +++ b/src/lib/project.rs @@ -634,16 +634,22 @@ impl Project { self.save() } - pub async fn pull(&mut self, overwrite: bool, url: bool, all: bool) -> Result<()> { + pub async fn pull( + &mut self, + overwrite: bool, + url: bool, + all: bool, + limit: &Option, + ) -> Result<()> { let path_context = self.path_context(); if all { - self.data.pull_urls(&path_context, overwrite).await?; - return self.data.pull(&path_context, overwrite).await; + self.data.pull_urls(&path_context, overwrite, limit).await?; + return self.data.pull(&path_context, overwrite, limit).await; } if url { - return self.data.pull_urls(&path_context, overwrite).await; + return self.data.pull_urls(&path_context, overwrite, limit).await; } - self.data.pull(&path_context, overwrite).await + self.data.pull(&path_context, overwrite, limit).await } pub async fn push(&mut self, overwrite: bool) -> Result<()> { diff --git a/src/main.rs b/src/main.rs index 56c4ef2..ad8d952 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use anyhow::{anyhow, Result}; use clap::{Parser, Subcommand}; @@ -216,8 +216,10 @@ enum Commands { /// Pull in files from remotes and URLs. #[arg(long)] all: bool, - // multiple optional directories - //directories: Vec, + + /// Pull in matching file or directory + #[arg(short, long, value_name = "FILE/DIRECTORY")] + limit: Option, }, /// Change the project metadata. Metadata { @@ -343,9 +345,10 @@ async fn run() -> Result<()> { overwrite, urls, all, + limit, }) => { let mut proj = Project::new()?; - proj.pull(*overwrite, *urls, *all).await + proj.pull(*overwrite, *urls, *all, limit).await } Some(Commands::Metadata { title, description }) => { let mut proj = Project::new()?;