From 2d4d04d0fe73ae135165d9fca08ce9181e824fcf Mon Sep 17 00:00:00 2001
From: Brendan Schell
Date: Wed, 3 Jul 2024 18:06:00 -0400
Subject: [PATCH 1/8] add new dataset and create dataset classes with summary
field
---
.../syft/src/syft/service/dataset/dataset.py | 64 ++++++++++++++++---
1 file changed, 56 insertions(+), 8 deletions(-)
diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py
index 5ab32347136..4d31827e602 100644
--- a/packages/syft/src/syft/service/dataset/dataset.py
+++ b/packages/syft/src/syft/service/dataset/dataset.py
@@ -24,6 +24,7 @@
from ...types.datetime import DateTime
from ...types.dicttuple import DictTuple
from ...types.syft_object import SYFT_OBJECT_VERSION_2
+from ...types.syft_object import SYFT_OBJECT_VERSION_3
from ...types.syft_object import SyftObject
from ...types.transforms import TransformContext
from ...types.transforms import generate_id
@@ -451,7 +452,7 @@ def get_shape_or_len(obj: Any) -> tuple[int, ...] | int | None:
@serializable()
-class Dataset(SyftObject):
+class DatasetV2(SyftObject):
# version
__canonical_name__: str = "Dataset"
__version__ = SYFT_OBJECT_VERSION_2
@@ -470,9 +471,38 @@ class Dataset(SyftObject):
created_at: DateTime = DateTime.now()
uploader: Contributor
- __attr_searchable__ = ["name", "citation", "url", "description", "action_ids"]
+
+@serializable()
+class Dataset(SyftObject):
+ # version
+ __canonical_name__: str = "Dataset"
+ __version__ = SYFT_OBJECT_VERSION_3
+
+ id: UID
+ name: str
+ node_uid: UID | None = None
+ asset_list: list[Asset] = []
+ contributors: set[Contributor] = set()
+ citation: str | None = None
+ url: str | None = None
+ description: MarkdownDescription | None = None
+ updated_at: str | None = None
+ requests: int | None = 0
+ mb_size: float | None = None
+ created_at: DateTime = DateTime.now()
+ uploader: Contributor
+ summary: str | None = None
+
+ __attr_searchable__ = [
+ "name",
+ "citation",
+ "url",
+ "description",
+ "action_ids",
+ "summary",
+ ]
__attr_unique__ = ["name"]
- __repr_attrs__ = ["name", "url", "created_at"]
+ __repr_attrs__ = ["name", "summary", "url", "created_at"]
__table_sort_attr__ = "Created at"
def __init__(
@@ -491,6 +521,7 @@ def icon(self) -> str:
def _coll_repr_(self) -> dict[str, Any]:
return {
"Name": self.name,
+ "Summary": self.summary,
"Assets": len(self.asset_list),
"Size": f"{self.mb_size} (MB)",
"Url": self.url,
@@ -501,12 +532,11 @@ def _repr_html_(self) -> Any:
uploaded_by_line = (
(
""
- + f"Uploaded by:{self.uploader.name} ({self.uploader.email})
"
+ + f"Uploaded by: {self.uploader.name} ({self.uploader.email})
"
)
if self.uploader
else ""
)
- description_text: str = self.description.text if self.description else ""
return f"""
{self.name}
-
{description_text}
+
Summary: {self.summary}
+ {"
A more detailed description is available by calling dataset.description
" if self.description is not None and self.description.text else ""}
{uploaded_by_line}
Created on: {self.created_at}
URL:
@@ -605,13 +636,27 @@ class DatasetPageView(SyftObject):
@serializable()
-class CreateDataset(Dataset):
+class CreateDatasetV2(DatasetV2):
# version
__canonical_name__ = "CreateDataset"
__version__ = SYFT_OBJECT_VERSION_2
asset_list: list[CreateAsset] = []
- __repr_attrs__ = ["name", "url"]
+ __repr_attrs__ = ["name", "summary", "url"]
+
+ id: UID | None = None # type: ignore[assignment]
+ created_at: DateTime | None = None # type: ignore[assignment]
+ uploader: Contributor | None = None # type: ignore[assignment]
+
+
+@serializable()
+class CreateDataset(Dataset):
+ # version
+ __canonical_name__ = "CreateDataset"
+ __version__ = SYFT_OBJECT_VERSION_3
+ asset_list: list[CreateAsset] = []
+
+ __repr_attrs__ = ["name", "summary", "url"]
id: UID | None = None # type: ignore[assignment]
created_at: DateTime | None = None # type: ignore[assignment]
@@ -633,6 +678,9 @@ def __assets_must_contain_mock(
def set_description(self, description: str) -> None:
self.description = MarkdownDescription(text=description)
+ def set_summary(self, summary: str) -> None:
+ self.summary = summary
+
def add_citation(self, citation: str) -> None:
self.citation = citation
From 372ca0cd9839aeb67991f8758f8e9aa406144736 Mon Sep 17 00:00:00 2001
From: Brendan Schell
Date: Wed, 3 Jul 2024 18:22:34 -0400
Subject: [PATCH 2/8] slight text cleanup and add migrations
---
.../syft/src/syft/service/dataset/dataset.py | 44 +++++++++++++++++--
1 file changed, 41 insertions(+), 3 deletions(-)
diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py
index 4d31827e602..157550e4184 100644
--- a/packages/syft/src/syft/service/dataset/dataset.py
+++ b/packages/syft/src/syft/service/dataset/dataset.py
@@ -23,11 +23,14 @@
from ...store.document_store import PartitionKey
from ...types.datetime import DateTime
from ...types.dicttuple import DictTuple
+from ...types.syft_migration import migrate
from ...types.syft_object import SYFT_OBJECT_VERSION_2
from ...types.syft_object import SYFT_OBJECT_VERSION_3
from ...types.syft_object import SyftObject
from ...types.transforms import TransformContext
+from ...types.transforms import drop
from ...types.transforms import generate_id
+from ...types.transforms import make_set_default
from ...types.transforms import transform
from ...types.transforms import validate_url
from ...types.uid import UID
@@ -537,6 +540,13 @@ def _repr_html_(self) -> Any:
if self.uploader
else ""
)
+ if self.description is not None and self.description.text:
+ description_info_message = (
+ " A more detailed description is available by calling \
+ dataset.description.
"
+ )
+ else:
+ description_info_message = ""
return f"""
-
{self.name}
-
Summary: {self.summary}
+
{self.name}
+
Summary
+
{self.summary}
{description_info_message}
+
Dataset Details
{uploaded_by_line}
Created on: {self.created_at}
URL:
{self.url}
Contributors:
To see full details call dataset.contributors.
+
Assets
{self.assets._repr_html_()}
"""
From 1e75adc61cf9b9c028bccd7733f84436b0ec7977 Mon Sep 17 00:00:00 2001
From: Brendan Schell
Date: Thu, 4 Jul 2024 19:03:17 -0400
Subject: [PATCH 8/8] update notebooks and make summary optional in repr
---
notebooks/api/0.8/00-load-data.ipynb | 3 ++-
.../tutorials/data-owner/01-uploading-private-data.ipynb | 6 ++++++
packages/syft/src/syft/service/dataset/dataset.py | 2 +-
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/notebooks/api/0.8/00-load-data.ipynb b/notebooks/api/0.8/00-load-data.ipynb
index c61fab17f41..ad98a5ac361 100644
--- a/notebooks/api/0.8/00-load-data.ipynb
+++ b/notebooks/api/0.8/00-load-data.ipynb
@@ -348,7 +348,8 @@
},
"outputs": [],
"source": [
- "dataset.set_description(\"Canada Trade Data\")"
+ "dataset.set_description(\"Canada Trade Data Markdown Description\")\n",
+ "dataset.set_summary(\"Canada Trade Data Short Summary\")"
]
},
{
diff --git a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb
index 02ed5576cb0..3a1863c0bdd 100644
--- a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb
+++ b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb
@@ -124,8 +124,14 @@
"metadata": {},
"outputs": [],
"source": [
+ "dataset_markdown_description = \"\"\"\n",
+ "### Contents\n",
+ "Numpy arrays of length 3 with integers ranging from 1 - 3.\n",
+ "\"\"\"\n",
"dataset = sy.Dataset(\n",
" name=\"my dataset\",\n",
+ " summary=\"Contains private and mock versions of data\",\n",
+ " description=dataset_markdown_description,\n",
" asset_list=[\n",
" sy.Asset(name=\"my asset\", data=np.array([1, 2, 3]), mock=np.array([1, 1, 1]))\n",
" ],\n",
diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py
index 950153d6a7d..c6d49799c6e 100644
--- a/packages/syft/src/syft/service/dataset/dataset.py
+++ b/packages/syft/src/syft/service/dataset/dataset.py
@@ -572,7 +572,7 @@ def _repr_html_(self) -> Any:
{self.name}
Summary
-
{self.summary}
+ {f"
{self.summary}
" if self.summary else ""}
{description_info_message}
Dataset Details
{uploaded_by_line}