From 2d4d04d0fe73ae135165d9fca08ce9181e824fcf Mon Sep 17 00:00:00 2001 From: Brendan Schell Date: Wed, 3 Jul 2024 18:06:00 -0400 Subject: [PATCH 1/8] add new dataset and create dataset classes with summary field --- .../syft/src/syft/service/dataset/dataset.py | 64 ++++++++++++++++--- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 5ab32347136..4d31827e602 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -24,6 +24,7 @@ from ...types.datetime import DateTime from ...types.dicttuple import DictTuple from ...types.syft_object import SYFT_OBJECT_VERSION_2 +from ...types.syft_object import SYFT_OBJECT_VERSION_3 from ...types.syft_object import SyftObject from ...types.transforms import TransformContext from ...types.transforms import generate_id @@ -451,7 +452,7 @@ def get_shape_or_len(obj: Any) -> tuple[int, ...] | int | None: @serializable() -class Dataset(SyftObject): +class DatasetV2(SyftObject): # version __canonical_name__: str = "Dataset" __version__ = SYFT_OBJECT_VERSION_2 @@ -470,9 +471,38 @@ class Dataset(SyftObject): created_at: DateTime = DateTime.now() uploader: Contributor - __attr_searchable__ = ["name", "citation", "url", "description", "action_ids"] + +@serializable() +class Dataset(SyftObject): + # version + __canonical_name__: str = "Dataset" + __version__ = SYFT_OBJECT_VERSION_3 + + id: UID + name: str + node_uid: UID | None = None + asset_list: list[Asset] = [] + contributors: set[Contributor] = set() + citation: str | None = None + url: str | None = None + description: MarkdownDescription | None = None + updated_at: str | None = None + requests: int | None = 0 + mb_size: float | None = None + created_at: DateTime = DateTime.now() + uploader: Contributor + summary: str | None = None + + __attr_searchable__ = [ + "name", + "citation", + "url", + "description", + "action_ids", + "summary", + ] __attr_unique__ = ["name"] - __repr_attrs__ = ["name", "url", "created_at"] + __repr_attrs__ = ["name", "summary", "url", "created_at"] __table_sort_attr__ = "Created at" def __init__( @@ -491,6 +521,7 @@ def icon(self) -> str: def _coll_repr_(self) -> dict[str, Any]: return { "Name": self.name, + "Summary": self.summary, "Assets": len(self.asset_list), "Size": f"{self.mb_size} (MB)", "Url": self.url, @@ -501,12 +532,11 @@ def _repr_html_(self) -> Any: uploaded_by_line = ( ( "

" - + f"Uploaded by:{self.uploader.name} ({self.uploader.email})

" + + f"Uploaded by: {self.uploader.name} ({self.uploader.email})

" ) if self.uploader else "" ) - description_text: str = self.description.text if self.description else "" return f"""

{self.name}

-

{description_text}

+

Summary: {self.summary}

+ {"

A more detailed description is available by calling dataset.description

" if self.description is not None and self.description.text else ""} {uploaded_by_line}

Created on: {self.created_at}

URL: @@ -605,13 +636,27 @@ class DatasetPageView(SyftObject): @serializable() -class CreateDataset(Dataset): +class CreateDatasetV2(DatasetV2): # version __canonical_name__ = "CreateDataset" __version__ = SYFT_OBJECT_VERSION_2 asset_list: list[CreateAsset] = [] - __repr_attrs__ = ["name", "url"] + __repr_attrs__ = ["name", "summary", "url"] + + id: UID | None = None # type: ignore[assignment] + created_at: DateTime | None = None # type: ignore[assignment] + uploader: Contributor | None = None # type: ignore[assignment] + + +@serializable() +class CreateDataset(Dataset): + # version + __canonical_name__ = "CreateDataset" + __version__ = SYFT_OBJECT_VERSION_3 + asset_list: list[CreateAsset] = [] + + __repr_attrs__ = ["name", "summary", "url"] id: UID | None = None # type: ignore[assignment] created_at: DateTime | None = None # type: ignore[assignment] @@ -633,6 +678,9 @@ def __assets_must_contain_mock( def set_description(self, description: str) -> None: self.description = MarkdownDescription(text=description) + def set_summary(self, summary: str) -> None: + self.summary = summary + def add_citation(self, citation: str) -> None: self.citation = citation From 372ca0cd9839aeb67991f8758f8e9aa406144736 Mon Sep 17 00:00:00 2001 From: Brendan Schell Date: Wed, 3 Jul 2024 18:22:34 -0400 Subject: [PATCH 2/8] slight text cleanup and add migrations --- .../syft/src/syft/service/dataset/dataset.py | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 4d31827e602..157550e4184 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -23,11 +23,14 @@ from ...store.document_store import PartitionKey from ...types.datetime import DateTime from ...types.dicttuple import DictTuple +from ...types.syft_migration import migrate from ...types.syft_object import SYFT_OBJECT_VERSION_2 from ...types.syft_object import SYFT_OBJECT_VERSION_3 from ...types.syft_object import SyftObject from ...types.transforms import TransformContext +from ...types.transforms import drop from ...types.transforms import generate_id +from ...types.transforms import make_set_default from ...types.transforms import transform from ...types.transforms import validate_url from ...types.uid import UID @@ -537,6 +540,13 @@ def _repr_html_(self) -> Any: if self.uploader else "" ) + if self.description is not None and self.description.text: + description_info_message = ( + "

A more detailed description is available by calling \ + dataset.description.

" + ) + else: + description_info_message = "" return f"""
-

{self.name}

-

Summary: {self.summary}

+

{self.name}

+

Summary

+

{self.summary}

{description_info_message} +

Dataset Details

{uploaded_by_line}

Created on: {self.created_at}

URL: {self.url}

Contributors: To see full details call dataset.contributors.

+

Assets

{self.assets._repr_html_()} """ From 1e75adc61cf9b9c028bccd7733f84436b0ec7977 Mon Sep 17 00:00:00 2001 From: Brendan Schell Date: Thu, 4 Jul 2024 19:03:17 -0400 Subject: [PATCH 8/8] update notebooks and make summary optional in repr --- notebooks/api/0.8/00-load-data.ipynb | 3 ++- .../tutorials/data-owner/01-uploading-private-data.ipynb | 6 ++++++ packages/syft/src/syft/service/dataset/dataset.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/notebooks/api/0.8/00-load-data.ipynb b/notebooks/api/0.8/00-load-data.ipynb index c61fab17f41..ad98a5ac361 100644 --- a/notebooks/api/0.8/00-load-data.ipynb +++ b/notebooks/api/0.8/00-load-data.ipynb @@ -348,7 +348,8 @@ }, "outputs": [], "source": [ - "dataset.set_description(\"Canada Trade Data\")" + "dataset.set_description(\"Canada Trade Data Markdown Description\")\n", + "dataset.set_summary(\"Canada Trade Data Short Summary\")" ] }, { diff --git a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb index 02ed5576cb0..3a1863c0bdd 100644 --- a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb +++ b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb @@ -124,8 +124,14 @@ "metadata": {}, "outputs": [], "source": [ + "dataset_markdown_description = \"\"\"\n", + "### Contents\n", + "Numpy arrays of length 3 with integers ranging from 1 - 3.\n", + "\"\"\"\n", "dataset = sy.Dataset(\n", " name=\"my dataset\",\n", + " summary=\"Contains private and mock versions of data\",\n", + " description=dataset_markdown_description,\n", " asset_list=[\n", " sy.Asset(name=\"my asset\", data=np.array([1, 2, 3]), mock=np.array([1, 1, 1]))\n", " ],\n", diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 950153d6a7d..c6d49799c6e 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -572,7 +572,7 @@ def _repr_html_(self) -> Any:

{self.name}

Summary

-

{self.summary}

+ {f"

{self.summary}

" if self.summary else ""} {description_info_message}

Dataset Details

{uploaded_by_line}