diff --git a/notebooks/api/0.8/00-load-data.ipynb b/notebooks/api/0.8/00-load-data.ipynb index c61fab17f41..ad98a5ac361 100644 --- a/notebooks/api/0.8/00-load-data.ipynb +++ b/notebooks/api/0.8/00-load-data.ipynb @@ -348,7 +348,8 @@ }, "outputs": [], "source": [ - "dataset.set_description(\"Canada Trade Data\")" + "dataset.set_description(\"Canada Trade Data Markdown Description\")\n", + "dataset.set_summary(\"Canada Trade Data Short Summary\")" ] }, { diff --git a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb index 02ed5576cb0..3a1863c0bdd 100644 --- a/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb +++ b/notebooks/tutorials/data-owner/01-uploading-private-data.ipynb @@ -124,8 +124,14 @@ "metadata": {}, "outputs": [], "source": [ + "dataset_markdown_description = \"\"\"\n", + "### Contents\n", + "Numpy arrays of length 3 with integers ranging from 1 - 3.\n", + "\"\"\"\n", "dataset = sy.Dataset(\n", " name=\"my dataset\",\n", + " summary=\"Contains private and mock versions of data\",\n", + " description=dataset_markdown_description,\n", " asset_list=[\n", " sy.Asset(name=\"my asset\", data=np.array([1, 2, 3]), mock=np.array([1, 1, 1]))\n", " ],\n", diff --git a/packages/syft/src/syft/assets/css/style.css b/packages/syft/src/syft/assets/css/style.css index beece1fa2c0..5fdee82e95d 100644 --- a/packages/syft/src/syft/assets/css/style.css +++ b/packages/syft/src/syft/assets/css/style.css @@ -6,6 +6,7 @@ body.vscode-dark { --button-color: #111111; --colors-black: #ffffff; --surface-color: #fff; + --text-color: #ffffff; } body { @@ -15,6 +16,7 @@ body { --button-color: #d1d5db; --colors-black: #17161d; --surface-color: #464158; + --text-color: #2e2b3b; } .header-1 { @@ -64,7 +66,7 @@ body { line-height: 100%; leading-trim: both; text-edge: cap; - color: #2e2b3b; + color: var(--text-color); } .paragraph-sm { @@ -75,7 +77,7 @@ body { line-height: 100%; leading-trim: both; text-edge: cap; - color: #2e2b3b; + color: var(--text-color); } .code-text { @@ -86,7 +88,7 @@ body { line-height: 130%; leading-trim: both; text-edge: cap; - color: #2e2b3b; + color: var(--text-color); } .numbering-entry { @@ -580,6 +582,8 @@ body { color: var(--surface-color); } +.syft-dataset h1, +.syft-dataset h2, .syft-dataset h3, .syft-dataset p, .syft-asset h3, diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 54a84878379..7a24a0175d7 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -9,6 +9,7 @@ # third party from IPython.display import display import itables +import markdown import pandas as pd from pydantic import ConfigDict from pydantic import field_validator @@ -23,10 +24,14 @@ from ...store.document_store import PartitionKey from ...types.datetime import DateTime from ...types.dicttuple import DictTuple +from ...types.syft_migration import migrate from ...types.syft_object import SYFT_OBJECT_VERSION_2 +from ...types.syft_object import SYFT_OBJECT_VERSION_3 from ...types.syft_object import SyftObject from ...types.transforms import TransformContext +from ...types.transforms import drop from ...types.transforms import generate_id +from ...types.transforms import make_set_default from ...types.transforms import transform from ...types.transforms import validate_url from ...types.uid import UID @@ -451,7 +456,7 @@ def get_shape_or_len(obj: Any) -> tuple[int, ...] | int | None: @serializable() -class Dataset(SyftObject): +class DatasetV2(SyftObject): # version __canonical_name__: str = "Dataset" __version__ = SYFT_OBJECT_VERSION_2 @@ -470,11 +475,52 @@ class Dataset(SyftObject): created_at: DateTime = DateTime.now() uploader: Contributor - __attr_searchable__ = ["name", "citation", "url", "description", "action_ids"] + __attr_searchable__ = [ + "name", + "citation", + "url", + "description", + "action_ids", + "summary", + ] __attr_unique__ = ["name"] __repr_attrs__ = ["name", "url", "created_at"] __table_sort_attr__ = "Created at" + +@serializable() +class Dataset(SyftObject): + # version + __canonical_name__: str = "Dataset" + __version__ = SYFT_OBJECT_VERSION_3 + + id: UID + name: str + node_uid: UID | None = None + asset_list: list[Asset] = [] + contributors: set[Contributor] = set() + citation: str | None = None + url: str | None = None + description: MarkdownDescription | None = None + updated_at: str | None = None + requests: int | None = 0 + mb_size: float | None = None + created_at: DateTime = DateTime.now() + uploader: Contributor + summary: str | None = None + + __attr_searchable__ = [ + "name", + "citation", + "url", + "description", + "action_ids", + "summary", + ] + __attr_unique__ = ["name"] + __repr_attrs__ = ["name", "summary", "url", "created_at"] + __table_sort_attr__ = "Created at" + def __init__( self, description: str | MarkdownDescription | None = "", @@ -491,6 +537,7 @@ def icon(self) -> str: def _coll_repr_(self) -> dict[str, Any]: return { "Name": self.name, + "Summary": self.summary, "Assets": len(self.asset_list), "Size": f"{self.mb_size} (MB)", "Url": self.url, @@ -501,12 +548,18 @@ def _repr_html_(self) -> Any: uploaded_by_line = ( ( "

" - + f"Uploaded by:{self.uploader.name} ({self.uploader.email})

" + + f"Uploaded by: {self.uploader.name} ({self.uploader.email})

" ) if self.uploader else "" ) - description_text: str = self.description.text if self.description else "" + if self.description is not None and self.description.text: + description_info_message = f""" +

Description

+ {markdown.markdown(self.description.text, extensions=["extra"])} + """ + else: + description_info_message = "" return f"""
-

{self.name}

-

{description_text}

+

{self.name}

+

Summary

+ {f"

{self.summary}

" if self.summary else ""} + {description_info_message} +

Dataset Details

{uploaded_by_line}

Created on: {self.created_at}

URL: {self.url}

Contributors: - to see full details call dataset.contributors

+ To see full details call dataset.contributors.

+

Assets

{self.assets._repr_html_()} """ @@ -605,7 +662,7 @@ class DatasetPageView(SyftObject): @serializable() -class CreateDataset(Dataset): +class CreateDatasetV2(DatasetV2): # version __canonical_name__ = "CreateDataset" __version__ = SYFT_OBJECT_VERSION_2 @@ -617,6 +674,20 @@ class CreateDataset(Dataset): created_at: DateTime | None = None # type: ignore[assignment] uploader: Contributor | None = None # type: ignore[assignment] + +@serializable() +class CreateDataset(Dataset): + # version + __canonical_name__ = "CreateDataset" + __version__ = SYFT_OBJECT_VERSION_3 + asset_list: list[CreateAsset] = [] + + __repr_attrs__ = ["name", "summary", "url"] + + id: UID | None = None # type: ignore[assignment] + created_at: DateTime | None = None # type: ignore[assignment] + uploader: Contributor | None = None # type: ignore[assignment] + model_config = ConfigDict(validate_assignment=True, extra="forbid") def _check_asset_must_contain_mock(self) -> None: @@ -633,6 +704,9 @@ def __assets_must_contain_mock( def set_description(self, description: str) -> None: self.description = MarkdownDescription(text=description) + def set_summary(self, summary: str) -> None: + self.summary = summary + def add_citation(self, citation: str) -> None: self.citation = citation @@ -856,5 +930,41 @@ def createdataset_to_dataset() -> list[Callable]: ] +@migrate(DatasetV2, Dataset) +def migrate_dataset_v2_to_v3() -> list[Callable]: + return [ + make_set_default("summary", None), + drop("__repr_attrs__"), + make_set_default("__repr_attrs__", ["name", "summary", "url", "created_at"]), + ] + + +@migrate(Dataset, DatasetV2) +def migrate_dataset_v3_to_v2() -> list[Callable]: + return [ + drop("summary"), + drop("__repr_attrs__"), + make_set_default("__repr_attrs__", ["name", "url", "created_at"]), + ] + + +@migrate(CreateDatasetV2, CreateDataset) +def migrate_create_dataset_v2_to_v3() -> list[Callable]: + return [ + make_set_default("summary", None), + drop("__repr_attrs__"), + make_set_default("__repr_attrs__", ["name", "summary", "url"]), + ] + + +@migrate(CreateDataset, CreateDatasetV2) +def migrate_create_dataset_v3_to_v2() -> list[Callable]: + return [ + drop("summary"), + drop("__repr_attrs__"), + make_set_default("__repr_attrs__", ["name", "url"]), + ] + + class DatasetUpdate: pass