Skip to content

Commit

Permalink
Pull only a single file or a whole folder.
Browse files Browse the repository at this point in the history
This is set using -l or --limit flag.
If a single file is given, it should be the full path (from the repo)
otherwise it won't match.
If a directory is given, match all files and subdirectories. If a
subdirectory is given, it should have all parents from the repo root.

Split the generation of a download queue in `pull` function into a
separate function for readability.
  • Loading branch information
apraga committed Jun 16, 2024
1 parent 14e1ccf commit fa2ee70
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 37 deletions.
99 changes: 71 additions & 28 deletions src/lib/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,11 +397,11 @@ impl DataFile {
pub fn directory(&self) -> Result<String> {
let path = std::path::Path::new(&self.path);
Ok(path
.parent()
.unwrap_or(path)
.to_str()
.unwrap_or("")
.to_string())
.parent()
.unwrap_or(path)
.to_str()
.unwrap_or("")
.to_string())
}

pub async fn get_md5(&self, path_context: &Path) -> Result<Option<String>> {
Expand Down Expand Up @@ -587,7 +587,7 @@ impl DataCollection {
} else {
Err(anyhow!(
"File '{}' is already registered in the data manifest.\n\
If you wish to update the MD5 or metadata, use: sdf update FILE",
If you wish to update the MD5 or metadata, use: sdf update FILE",
&data_file.path
))
}
Expand Down Expand Up @@ -719,7 +719,7 @@ impl DataCollection {
match data_file {
None => Err(anyhow!(
"Data file '{}' is not in the data manifest. Add it first using:\n \
$ sdf track {}\n",
$ sdf track {}\n",
filepath,
filepath
)),
Expand All @@ -742,7 +742,7 @@ impl DataCollection {
match data_file {
None => Err(anyhow!(
"Cannot untrack data file '{}' since it was never added to\
the data manifest.",
the data manifest.",
filepath
)),
Some(file) => file.set_untracked(),
Expand Down Expand Up @@ -794,7 +794,7 @@ impl DataCollection {
match result {
Ok((key, value)) => {
pb.bar
.set_message(format!("Fetching remote files... {} done.", key.0));
.set_message(format!("Fetching remote files... {} done.", key.0));
all_remote_files.insert(key, value);
pb.bar.inc(1);
}
Expand Down Expand Up @@ -913,7 +913,7 @@ impl DataCollection {
while let Some(result) = statuses_futures.next().await {
if let Ok((key, value)) = result {
pb.bar
.set_message(format!("Calculating MD5s... {} done.", &value.name));
.set_message(format!("Calculating MD5s... {} done.", &value.name));
statuses.entry(key).or_insert_with(Vec::new).push(value);
pb.bar.inc(1);
} else {
Expand Down Expand Up @@ -1059,13 +1059,40 @@ impl DataCollection {
Ok(())
}

pub async fn pull_urls(&mut self, path_context: &Path, overwrite: bool) -> Result<()> {
// Compare a local path `local` to a user request `request` (either a path or a directory)
// - if request is a path, the full paths of both must match exactly
// - otherwise request is a directory and one of the ancestor must match
// Exemple
// - "a/test.txt" would not match "test.text" (first case)
// - "a/b/c/test.txt" would match "a/b/c" or "a/b" (second case)
// - "a/b/c/test.txt" would not match "b/c"
fn match_user_file(local: &String, request: &Option<PathBuf>) -> bool {
let p = PathBuf::from(local);
if let Some(req) = request {
p == *req || Self::has_common_ancestor(p, req)
}
else {
false
}
}

// Common ancestor starting from root between a filepath and a directory
// - "a/b/c/test.txt" and "a/b/c" have one
// - "a/b/c/test.txt" and "b/c" and don't
fn has_common_ancestor(file: PathBuf, dir: &PathBuf) -> bool {
file.ancestors().filter(|x| !x.as_os_str().is_empty() && x == dir).count() > 0
}

pub async fn pull_urls(&mut self, path_context: &Path, overwrite: bool, limit: &Option<PathBuf>) -> Result<()> {
let mut downloads = Downloads::new();
let mut filepaths = Vec::new();
let mut skipped = Vec::new();
let mut num_downloaded = 0;
for data_file in self.files.values() {
if let Some(url) = &data_file.url {
if !Self::match_user_file(&data_file.path, limit) {
continue;
}
let full_path = data_file.full_path(path_context)?;
let download =
downloads.add(url.clone(), Some(&full_path.to_string_lossy()), overwrite)?;
Expand All @@ -1088,35 +1115,33 @@ impl DataCollection {
let num_skipped = skipped.len();
println!(
"{} files were downloaded.\n\
{} files were skipped because they existed (and --overwrite was not specified).",
{} files were skipped because they existed (and --overwrite was not specified).",
num_downloaded, num_skipped
);
Ok(())
}

// Download all files
//
// TODO: code redundancy with the push method's tracking of
// why stuff is skipped; split out info enum, etc.
pub async fn pull(&mut self, path_context: &Path, overwrite: bool) -> Result<()> {
let all_files = self.merge(true).await?;

let mut downloads = Downloads::new();

let mut current_skipped = Vec::new();
let mut messy_skipped = Vec::new();
let mut overwrite_skipped = Vec::new();

async fn pull_get_downloads(&mut self, all_files: &HashMap<String, HashMap<String, MergedFile>>, path_context: &Path,
overwrite: bool,
request: &Option<PathBuf>,
current_skipped: &mut Vec<String>,
messy_skipped: &mut Vec<String>,
overwrite_skipped: &mut Vec<String>,
downloads: &mut Downloads) -> Result<()> {
for (dir, merged_files) in all_files.iter() {
// can_download() is true only if local and remote are not None.
// 1. Skip non matching files if needed
// 2. can_download() is true only if local and remote are not None.
// (local file can be deleted, but will only be None if not in manifest also)
for merged_file in merged_files.values().filter(|f| f.can_download()) {
let filtered = merged_files.iter().filter(|(k,v)| Self::match_user_file(&k, request)
&& v.can_download());
for (_, merged_file) in filtered {
let path = merged_file.name()?;
println!("filtered {:?}", merged_file);

let do_download = match merged_file.status(path_context).await? {
RemoteStatusCode::NoLocal => {
return Err(anyhow!("Internal error: execution should not have reached this point, please report.\n\
'sdf pull' filtered by MergedFile.can_download() but found a RemoteStatusCode::NoLocal status."));
'sdf pull' filtered by MergedFile.can_download() but found a RemoteStatusCode::NoLocal status."));
}
RemoteStatusCode::Current => {
current_skipped.push(path);
Expand Down Expand Up @@ -1159,6 +1184,24 @@ impl DataCollection {
}
}
}
Ok(())
}

// Download all files, or a set of matching files
//
// TODO: code redundancy with the push method's tracking of
// why stuff is skipped; split out info enum, etc.
pub async fn pull(&mut self, path_context: &Path, overwrite: bool, limit: &Option<PathBuf>) -> Result<()> {
let all_files = self.merge(true).await?;

let mut current_skipped = Vec::new();
let mut messy_skipped = Vec::new();
let mut overwrite_skipped = Vec::new();

let mut downloads = Downloads::new();
self.pull_get_downloads(&all_files, path_context, overwrite, limit,
&mut current_skipped, &mut messy_skipped,
&mut overwrite_skipped, &mut downloads).await?;

// now retrieve all the files in the queue.
downloads
Expand Down
16 changes: 11 additions & 5 deletions src/lib/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,16 +634,22 @@ impl Project {
self.save()
}

pub async fn pull(&mut self, overwrite: bool, url: bool, all: bool) -> Result<()> {
pub async fn pull(
&mut self,
overwrite: bool,
url: bool,
all: bool,
limit: &Option<PathBuf>,
) -> Result<()> {
let path_context = self.path_context();
if all {
self.data.pull_urls(&path_context, overwrite).await?;
return self.data.pull(&path_context, overwrite).await;
self.data.pull_urls(&path_context, overwrite, limit).await?;
return self.data.pull(&path_context, overwrite, limit).await;
}
if url {
return self.data.pull_urls(&path_context, overwrite).await;
return self.data.pull_urls(&path_context, overwrite, limit).await;
}
self.data.pull(&path_context, overwrite).await
self.data.pull(&path_context, overwrite, limit).await
}

pub async fn push(&mut self, overwrite: bool) -> Result<()> {
Expand Down
11 changes: 7 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::path::Path;
use std::path::{Path, PathBuf};

use anyhow::{anyhow, Result};
use clap::{Parser, Subcommand};
Expand Down Expand Up @@ -216,8 +216,10 @@ enum Commands {
/// Pull in files from remotes and URLs.
#[arg(long)]
all: bool,
// multiple optional directories
//directories: Vec<PathBuf>,

/// Pull in matching file or directory
#[arg(short, long, value_name = "FILE/DIRECTORY")]
limit: Option<PathBuf>,
},
/// Change the project metadata.
Metadata {
Expand Down Expand Up @@ -343,9 +345,10 @@ async fn run() -> Result<()> {
overwrite,
urls,
all,
limt,
}) => {
let mut proj = Project::new()?;
proj.pull(*overwrite, *urls, *all).await
proj.pull(*overwrite, *urls, *all, limit).await
}
Some(Commands::Metadata { title, description }) => {
let mut proj = Project::new()?;
Expand Down

0 comments on commit fa2ee70

Please sign in to comment.