Skip to content

Commit

Permalink
Merge pull request #9015 from OpenMined/bschell/fix-dataset-markdown-…
Browse files Browse the repository at this point in the history
…repr

Add short summary field to dataset and clean up repr
  • Loading branch information
IonesioJunior committed Jul 5, 2024
2 parents e2bd35e + 567bc3b commit 0629260
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 12 deletions.
3 changes: 2 additions & 1 deletion notebooks/api/0.8/00-load-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@
},
"outputs": [],
"source": [
"dataset.set_description(\"Canada Trade Data\")"
"dataset.set_description(\"Canada Trade Data Markdown Description\")\n",
"dataset.set_summary(\"Canada Trade Data Short Summary\")"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,14 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_markdown_description = \"\"\"\n",
"### Contents\n",
"Numpy arrays of length 3 with integers ranging from 1 - 3.\n",
"\"\"\"\n",
"dataset = sy.Dataset(\n",
" name=\"my dataset\",\n",
" summary=\"Contains private and mock versions of data\",\n",
" description=dataset_markdown_description,\n",
" asset_list=[\n",
" sy.Asset(name=\"my asset\", data=np.array([1, 2, 3]), mock=np.array([1, 1, 1]))\n",
" ],\n",
Expand Down
10 changes: 7 additions & 3 deletions packages/syft/src/syft/assets/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ body.vscode-dark {
--button-color: #111111;
--colors-black: #ffffff;
--surface-color: #fff;
--text-color: #ffffff;
}

body {
Expand All @@ -15,6 +16,7 @@ body {
--button-color: #d1d5db;
--colors-black: #17161d;
--surface-color: #464158;
--text-color: #2e2b3b;
}

.header-1 {
Expand Down Expand Up @@ -64,7 +66,7 @@ body {
line-height: 100%;
leading-trim: both;
text-edge: cap;
color: #2e2b3b;
color: var(--text-color);
}

.paragraph-sm {
Expand All @@ -75,7 +77,7 @@ body {
line-height: 100%;
leading-trim: both;
text-edge: cap;
color: #2e2b3b;
color: var(--text-color);
}

.code-text {
Expand All @@ -86,7 +88,7 @@ body {
line-height: 130%;
leading-trim: both;
text-edge: cap;
color: #2e2b3b;
color: var(--text-color);
}

.numbering-entry {
Expand Down Expand Up @@ -580,6 +582,8 @@ body {
color: var(--surface-color);
}

.syft-dataset h1,
.syft-dataset h2,
.syft-dataset h3,
.syft-dataset p,
.syft-asset h3,
Expand Down
126 changes: 118 additions & 8 deletions packages/syft/src/syft/service/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# third party
from IPython.display import display
import itables
import markdown
import pandas as pd
from pydantic import ConfigDict
from pydantic import field_validator
Expand All @@ -23,10 +24,14 @@
from ...store.document_store import PartitionKey
from ...types.datetime import DateTime
from ...types.dicttuple import DictTuple
from ...types.syft_migration import migrate
from ...types.syft_object import SYFT_OBJECT_VERSION_2
from ...types.syft_object import SYFT_OBJECT_VERSION_3
from ...types.syft_object import SyftObject
from ...types.transforms import TransformContext
from ...types.transforms import drop
from ...types.transforms import generate_id
from ...types.transforms import make_set_default
from ...types.transforms import transform
from ...types.transforms import validate_url
from ...types.uid import UID
Expand Down Expand Up @@ -451,7 +456,7 @@ def get_shape_or_len(obj: Any) -> tuple[int, ...] | int | None:


@serializable()
class Dataset(SyftObject):
class DatasetV2(SyftObject):
# version
__canonical_name__: str = "Dataset"
__version__ = SYFT_OBJECT_VERSION_2
Expand All @@ -470,11 +475,52 @@ class Dataset(SyftObject):
created_at: DateTime = DateTime.now()
uploader: Contributor

__attr_searchable__ = ["name", "citation", "url", "description", "action_ids"]
__attr_searchable__ = [
"name",
"citation",
"url",
"description",
"action_ids",
"summary",
]
__attr_unique__ = ["name"]
__repr_attrs__ = ["name", "url", "created_at"]
__table_sort_attr__ = "Created at"


@serializable()
class Dataset(SyftObject):
# version
__canonical_name__: str = "Dataset"
__version__ = SYFT_OBJECT_VERSION_3

id: UID
name: str
node_uid: UID | None = None
asset_list: list[Asset] = []
contributors: set[Contributor] = set()
citation: str | None = None
url: str | None = None
description: MarkdownDescription | None = None
updated_at: str | None = None
requests: int | None = 0
mb_size: float | None = None
created_at: DateTime = DateTime.now()
uploader: Contributor
summary: str | None = None

__attr_searchable__ = [
"name",
"citation",
"url",
"description",
"action_ids",
"summary",
]
__attr_unique__ = ["name"]
__repr_attrs__ = ["name", "summary", "url", "created_at"]
__table_sort_attr__ = "Created at"

def __init__(
self,
description: str | MarkdownDescription | None = "",
Expand All @@ -491,6 +537,7 @@ def icon(self) -> str:
def _coll_repr_(self) -> dict[str, Any]:
return {
"Name": self.name,
"Summary": self.summary,
"Assets": len(self.asset_list),
"Size": f"{self.mb_size} (MB)",
"Url": self.url,
Expand All @@ -501,12 +548,18 @@ def _repr_html_(self) -> Any:
uploaded_by_line = (
(
"<p class='paragraph-sm'><strong>"
+ f"<span class='pr-8'>Uploaded by:</span></strong>{self.uploader.name} ({self.uploader.email})</p>"
+ f"<span class='pr-8'>Uploaded by: </span></strong>{self.uploader.name} ({self.uploader.email})</p>"
)
if self.uploader
else ""
)
description_text: str = self.description.text if self.description else ""
if self.description is not None and self.description.text:
description_info_message = f"""
<h2><strong><span class='pr-8'>Description</span></strong></h2>
{markdown.markdown(self.description.text, extensions=["extra"])}
"""
else:
description_info_message = ""
return f"""
<style>
{FONT_CSS}
Expand All @@ -517,14 +570,18 @@ def _repr_html_(self) -> Any:
{ITABLES_CSS}
</style>
<div class='syft-dataset'>
<h3>{self.name}</h3>
<p>{description_text}</p>
<h1>{self.name}</h1>
<h2><strong><span class='pr-8'>Summary</span></strong></h2>
{f"<p>{self.summary}</p>" if self.summary else ""}
{description_info_message}
<h2><strong><span class='pr-8'>Dataset Details</span></strong></h2>
{uploaded_by_line}
<p class='paragraph-sm'><strong><span class='pr-8'>Created on: </span></strong>{self.created_at}</p>
<p class='paragraph-sm'><strong><span class='pr-8'>URL:
</span></strong><a href='{self.url}'>{self.url}</a></p>
<p class='paragraph-sm'><strong><span class='pr-8'>Contributors:</span></strong>
to see full details call <strong>dataset.contributors</strong></p>
To see full details call <strong>dataset.contributors</strong>.</p>
<h2><strong><span class='pr-8'>Assets</span></strong></h2>
{self.assets._repr_html_()}
"""

Expand Down Expand Up @@ -605,7 +662,7 @@ class DatasetPageView(SyftObject):


@serializable()
class CreateDataset(Dataset):
class CreateDatasetV2(DatasetV2):
# version
__canonical_name__ = "CreateDataset"
__version__ = SYFT_OBJECT_VERSION_2
Expand All @@ -617,6 +674,20 @@ class CreateDataset(Dataset):
created_at: DateTime | None = None # type: ignore[assignment]
uploader: Contributor | None = None # type: ignore[assignment]


@serializable()
class CreateDataset(Dataset):
# version
__canonical_name__ = "CreateDataset"
__version__ = SYFT_OBJECT_VERSION_3
asset_list: list[CreateAsset] = []

__repr_attrs__ = ["name", "summary", "url"]

id: UID | None = None # type: ignore[assignment]
created_at: DateTime | None = None # type: ignore[assignment]
uploader: Contributor | None = None # type: ignore[assignment]

model_config = ConfigDict(validate_assignment=True, extra="forbid")

def _check_asset_must_contain_mock(self) -> None:
Expand All @@ -633,6 +704,9 @@ def __assets_must_contain_mock(
def set_description(self, description: str) -> None:
self.description = MarkdownDescription(text=description)

def set_summary(self, summary: str) -> None:
self.summary = summary

def add_citation(self, citation: str) -> None:
self.citation = citation

Expand Down Expand Up @@ -856,5 +930,41 @@ def createdataset_to_dataset() -> list[Callable]:
]


@migrate(DatasetV2, Dataset)
def migrate_dataset_v2_to_v3() -> list[Callable]:
return [
make_set_default("summary", None),
drop("__repr_attrs__"),
make_set_default("__repr_attrs__", ["name", "summary", "url", "created_at"]),
]


@migrate(Dataset, DatasetV2)
def migrate_dataset_v3_to_v2() -> list[Callable]:
return [
drop("summary"),
drop("__repr_attrs__"),
make_set_default("__repr_attrs__", ["name", "url", "created_at"]),
]


@migrate(CreateDatasetV2, CreateDataset)
def migrate_create_dataset_v2_to_v3() -> list[Callable]:
return [
make_set_default("summary", None),
drop("__repr_attrs__"),
make_set_default("__repr_attrs__", ["name", "summary", "url"]),
]


@migrate(CreateDataset, CreateDatasetV2)
def migrate_create_dataset_v3_to_v2() -> list[Callable]:
return [
drop("summary"),
drop("__repr_attrs__"),
make_set_default("__repr_attrs__", ["name", "url"]),
]


class DatasetUpdate:
pass

0 comments on commit 0629260

Please sign in to comment.