Skip to content

Commit

Permalink
more minimal data manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
vsbuffalo committed Aug 29, 2023
1 parent 8eb2f61 commit cc35b79
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 37 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ name = "scidataflow"
path = "src/lib.rs"

[[bin]]
name = "scf"
name = "sdf"
path = "src/main.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,15 @@ anywhere with the commands:

- [x] FigShare
- [ ] Data Dryad
- [ ] Zenodo
- [x] Zenodo
- [ ] static remotes (i.e. just URLs)

## TODO

- remote_init for zenodo needs to check for existing.

- link_only should propagate remote IDs, etc

- we need to be more strict about whether the remotes have files that
are listed as tracked in *subdirectories*. E.g. we should, when a
link to a remote is added to track a directory, check that that
Expand Down
2 changes: 1 addition & 1 deletion src/lib/api/dryad.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use serde_derive::{Serialize,Deserialize};

#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
pub struct DataDryadAPI {
base_url: String,

Expand Down
2 changes: 1 addition & 1 deletion src/lib/api/figshare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn figshare_api_url() -> String {
FIGSHARE_BASE_URL.to_string()
}

#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
pub struct FigShareAPI {
#[serde(skip_serializing, skip_deserializing,default="figshare_api_url")]
base_url: String,
Expand Down
3 changes: 2 additions & 1 deletion src/lib/api/zenodo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ fn zenodo_api_url() -> String {
}


#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
pub struct ZenodoAPI {
#[serde(skip_serializing, skip_deserializing,default="zenodo_api_url")]
base_url: String,
Expand Down Expand Up @@ -252,6 +252,7 @@ impl ZenodoAPI {
// For Zenodo, this creates a new "deposition"
#[allow(unused)]
pub async fn remote_init(&mut self, local_metadata: LocalMetadata) -> Result<()> {
// TODO URGENT: check for existing entries!
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
let metadata: ZenodoDepositionData = local_metadata.try_into()?;
Expand Down
74 changes: 54 additions & 20 deletions src/lib/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::path::{PathBuf,Path};
use anyhow::{anyhow,Result};
use std::fs::{metadata};
use serde_derive::{Serialize,Deserialize};
use serde::ser::SerializeMap;
use serde;
use crate::lib::data::serde::{Serializer,Deserializer};
#[allow(unused_imports)]
use log::{info, trace, debug};
use chrono::prelude::*;
Expand Down Expand Up @@ -441,39 +441,73 @@ impl DataFile {
}
}

fn ordered_map<K, V, S>(value: &HashMap<K, V>, serializer: S) -> Result<S::Ok, S::Error>
where
K: serde::Serialize + Ord,
V: serde::Serialize,
S: serde::ser::Serializer,
{
let mut ordered: Vec<_> = value.iter().collect();
ordered.sort_by_key(|a| a.0);

let mut map = serializer.serialize_map(Some(ordered.len()))?;
for (k, v) in ordered {
map.serialize_entry(k, v)?;
}
map.end()
}

#[derive(Debug, Serialize, Deserialize, Default, PartialEq)]
#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Clone)]
pub struct DataCollectionMetadata {
pub title: Option<String>,
pub description: Option<String>,
}

/// DataCollection structure for managing the data manifest
/// and how it talks to the outside world.
#[derive(Debug, PartialEq, Serialize, Deserialize, Default)]
#[derive(Debug, PartialEq, Default)]
pub struct DataCollection {
#[serde(serialize_with = "ordered_map")]
pub files: HashMap<String, DataFile>,
#[serde(serialize_with = "ordered_map")]
pub remotes: HashMap<String, Remote>, // key is tracked directory
pub metadata: DataCollectionMetadata,
}

#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Clone)]
pub struct MinimalDataCollection {
pub files: Vec<DataFile>,
pub remotes: HashMap<String, Remote>,
pub metadata: DataCollectionMetadata,

}

impl serde::Serialize for DataCollection {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// Serialize `files` as a sorted vector
let sorted_files: Vec<DataFile> = self.files.values().cloned().collect();

// Construct a new struct to hold the serializable parts
let to_serialize = MinimalDataCollection {
files: sorted_files,
remotes: self.remotes.clone(),
metadata: self.metadata.clone(),
};

to_serialize.serialize(serializer)
}
}

impl<'de> serde::Deserialize<'de> for DataCollection {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
// Deserialize into a temporary struct
let temp = MinimalDataCollection::deserialize(deserializer)?;

// Build the HashMap for files based on the path
let files = temp
.files
.into_iter()
.map(|df| (df.path.clone(), df))
.collect();

Ok(DataCollection {
files,
remotes: temp.remotes,
metadata: temp.metadata,
})
}
}


/// DataCollection methods: these should *only* be for
/// interacting with the data manifest (including remotes).
impl DataCollection {
Expand Down
18 changes: 10 additions & 8 deletions src/lib/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ impl Project {
}

pub async fn link(&mut self, dir: &str, service: &str,
key: &str, name: &Option<String>) -> Result<()> {
key: &str, name: &Option<String>, link_only: &bool) -> Result<()> {
// (0) get the relative directory path
let dir = self.relative_path_string(Path::new(dir))?;

Expand Down Expand Up @@ -337,13 +337,15 @@ impl Project {
// is already done.
self.data.validate_remote_directory(&dir)?;

// (5) initialize the remote (e.g. for FigShare, this
// checks that the article doesn't exist (error if it
// does), creates it, and sets the FigShare.article_id
// once it is assigned by the remote).
// Note: we pass the Project to remote_init
let local_metadata = LocalMetadata::from_project(self);
remote.remote_init(local_metadata).await?;
if !link_only {
// (5) initialize the remote (e.g. for FigShare, this
// checks that the article doesn't exist (error if it
// does), creates it, and sets the FigShare.article_id
// once it is assigned by the remote).
// Note: we pass the Project to remote_init
let local_metadata = LocalMetadata::from_project(self);
remote.remote_init(local_metadata).await?;
}

// (6) register the remote in the manifest
self.data.register_remote(&dir, remote)?;
Expand Down
2 changes: 1 addition & 1 deletion src/lib/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl AuthKeys {
}
}

#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub enum Remote {
FigShareAPI(FigShareAPI),
DataDryadAPI(DataDryadAPI),
Expand Down
11 changes: 8 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,12 @@ enum Commands {
key: String,
/// project name for remote (default: directory name)
#[structopt(long)]
name: Option<String>
name: Option<String>,

/// don't initialize remote, only add to manifest
#[structopt(long)]
link_only: bool

},

#[structopt(name = "ls")]
Expand Down Expand Up @@ -193,9 +198,9 @@ async fn run() -> Result<()> {
let mut proj = Project::new()?;
proj.update(filename.as_ref())
}
Some(Commands::Link { dir, service, key, name }) => {
Some(Commands::Link { dir, service, key, name, link_only }) => {
let mut proj = Project::new()?;
proj.link(dir, service, key, name).await
proj.link(dir, service, key, name, link_only).await
}
Some(Commands::Ls {}) => {
let mut proj = Project::new()?;
Expand Down

0 comments on commit cc35b79

Please sign in to comment.