From cc35b797478875e05015d5db860db637a45f7dcf Mon Sep 17 00:00:00 2001 From: Vince Buffalo Date: Mon, 28 Aug 2023 22:56:45 -0700 Subject: [PATCH] more minimal data manifest --- Cargo.toml | 2 +- README.md | 6 +++- src/lib/api/dryad.rs | 2 +- src/lib/api/figshare.rs | 2 +- src/lib/api/zenodo.rs | 3 +- src/lib/data.rs | 74 ++++++++++++++++++++++++++++++----------- src/lib/project.rs | 18 +++++----- src/lib/remote.rs | 2 +- src/main.rs | 11 ++++-- 9 files changed, 83 insertions(+), 37 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 88e0ff6..f379b6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ name = "scidataflow" path = "src/lib.rs" [[bin]] -name = "scf" +name = "sdf" path = "src/main.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 237cea0..5d9971c 100644 --- a/README.md +++ b/README.md @@ -51,11 +51,15 @@ anywhere with the commands: - [x] FigShare - [ ] Data Dryad - - [ ] Zenodo + - [x] Zenodo - [ ] static remotes (i.e. just URLs) ## TODO + - remote_init for zenodo needs to check for existing. + + - link_only should propagate remote IDs, etc + - we need to be more strict about whether the remotes have files that are listed as tracked in *subdirectories*. E.g. we should, when a link to a remote is added to track a directory, check that that diff --git a/src/lib/api/dryad.rs b/src/lib/api/dryad.rs index 0568f42..ad0effa 100644 --- a/src/lib/api/dryad.rs +++ b/src/lib/api/dryad.rs @@ -1,6 +1,6 @@ use serde_derive::{Serialize,Deserialize}; -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub struct DataDryadAPI { base_url: String, diff --git a/src/lib/api/figshare.rs b/src/lib/api/figshare.rs index 580c847..a27fabc 100644 --- a/src/lib/api/figshare.rs +++ b/src/lib/api/figshare.rs @@ -37,7 +37,7 @@ fn figshare_api_url() -> String { FIGSHARE_BASE_URL.to_string() } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub struct FigShareAPI { #[serde(skip_serializing, skip_deserializing,default="figshare_api_url")] base_url: String, diff --git a/src/lib/api/zenodo.rs b/src/lib/api/zenodo.rs index 55374b3..3408e36 100644 --- a/src/lib/api/zenodo.rs +++ b/src/lib/api/zenodo.rs @@ -159,7 +159,7 @@ fn zenodo_api_url() -> String { } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub struct ZenodoAPI { #[serde(skip_serializing, skip_deserializing,default="zenodo_api_url")] base_url: String, @@ -252,6 +252,7 @@ impl ZenodoAPI { // For Zenodo, this creates a new "deposition" #[allow(unused)] pub async fn remote_init(&mut self, local_metadata: LocalMetadata) -> Result<()> { + // TODO URGENT: check for existing entries! let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); let metadata: ZenodoDepositionData = local_metadata.try_into()?; diff --git a/src/lib/data.rs b/src/lib/data.rs index 222d6f7..8eba3cd 100644 --- a/src/lib/data.rs +++ b/src/lib/data.rs @@ -2,8 +2,8 @@ use std::path::{PathBuf,Path}; use anyhow::{anyhow,Result}; use std::fs::{metadata}; use serde_derive::{Serialize,Deserialize}; -use serde::ser::SerializeMap; use serde; +use crate::lib::data::serde::{Serializer,Deserializer}; #[allow(unused_imports)] use log::{info, trace, debug}; use chrono::prelude::*; @@ -441,23 +441,8 @@ impl DataFile { } } -fn ordered_map(value: &HashMap, serializer: S) -> Result -where -K: serde::Serialize + Ord, -V: serde::Serialize, -S: serde::ser::Serializer, -{ - let mut ordered: Vec<_> = value.iter().collect(); - ordered.sort_by_key(|a| a.0); - - let mut map = serializer.serialize_map(Some(ordered.len()))?; - for (k, v) in ordered { - map.serialize_entry(k, v)?; - } - map.end() -} -#[derive(Debug, Serialize, Deserialize, Default, PartialEq)] +#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Clone)] pub struct DataCollectionMetadata { pub title: Option, pub description: Option, @@ -465,15 +450,64 @@ pub struct DataCollectionMetadata { /// DataCollection structure for managing the data manifest /// and how it talks to the outside world. -#[derive(Debug, PartialEq, Serialize, Deserialize, Default)] +#[derive(Debug, PartialEq, Default)] pub struct DataCollection { - #[serde(serialize_with = "ordered_map")] pub files: HashMap, - #[serde(serialize_with = "ordered_map")] pub remotes: HashMap, // key is tracked directory pub metadata: DataCollectionMetadata, } +#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Clone)] +pub struct MinimalDataCollection { + pub files: Vec, + pub remotes: HashMap, + pub metadata: DataCollectionMetadata, + +} + +impl serde::Serialize for DataCollection { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // Serialize `files` as a sorted vector + let sorted_files: Vec = self.files.values().cloned().collect(); + + // Construct a new struct to hold the serializable parts + let to_serialize = MinimalDataCollection { + files: sorted_files, + remotes: self.remotes.clone(), + metadata: self.metadata.clone(), + }; + + to_serialize.serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for DataCollection { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Deserialize into a temporary struct + let temp = MinimalDataCollection::deserialize(deserializer)?; + + // Build the HashMap for files based on the path + let files = temp + .files + .into_iter() + .map(|df| (df.path.clone(), df)) + .collect(); + + Ok(DataCollection { + files, + remotes: temp.remotes, + metadata: temp.metadata, + }) + } +} + + /// DataCollection methods: these should *only* be for /// interacting with the data manifest (including remotes). impl DataCollection { diff --git a/src/lib/project.rs b/src/lib/project.rs index 1f93867..7bf486a 100644 --- a/src/lib/project.rs +++ b/src/lib/project.rs @@ -305,7 +305,7 @@ impl Project { } pub async fn link(&mut self, dir: &str, service: &str, - key: &str, name: &Option) -> Result<()> { + key: &str, name: &Option, link_only: &bool) -> Result<()> { // (0) get the relative directory path let dir = self.relative_path_string(Path::new(dir))?; @@ -337,13 +337,15 @@ impl Project { // is already done. self.data.validate_remote_directory(&dir)?; - // (5) initialize the remote (e.g. for FigShare, this - // checks that the article doesn't exist (error if it - // does), creates it, and sets the FigShare.article_id - // once it is assigned by the remote). - // Note: we pass the Project to remote_init - let local_metadata = LocalMetadata::from_project(self); - remote.remote_init(local_metadata).await?; + if !link_only { + // (5) initialize the remote (e.g. for FigShare, this + // checks that the article doesn't exist (error if it + // does), creates it, and sets the FigShare.article_id + // once it is assigned by the remote). + // Note: we pass the Project to remote_init + let local_metadata = LocalMetadata::from_project(self); + remote.remote_init(local_metadata).await?; + } // (6) register the remote in the manifest self.data.register_remote(&dir, remote)?; diff --git a/src/lib/remote.rs b/src/lib/remote.rs index 45f54a5..cee0bce 100644 --- a/src/lib/remote.rs +++ b/src/lib/remote.rs @@ -146,7 +146,7 @@ impl AuthKeys { } } -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] pub enum Remote { FigShareAPI(FigShareAPI), DataDryadAPI(DataDryadAPI), diff --git a/src/main.rs b/src/main.rs index bc8aa28..3f57f13 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,7 +104,12 @@ enum Commands { key: String, /// project name for remote (default: directory name) #[structopt(long)] - name: Option + name: Option, + + /// don't initialize remote, only add to manifest + #[structopt(long)] + link_only: bool + }, #[structopt(name = "ls")] @@ -193,9 +198,9 @@ async fn run() -> Result<()> { let mut proj = Project::new()?; proj.update(filename.as_ref()) } - Some(Commands::Link { dir, service, key, name }) => { + Some(Commands::Link { dir, service, key, name, link_only }) => { let mut proj = Project::new()?; - proj.link(dir, service, key, name).await + proj.link(dir, service, key, name, link_only).await } Some(Commands::Ls {}) => { let mut proj = Project::new()?;