Skip to content

Commit

Permalink
add datasets to cloud workspace (#1192)
Browse files Browse the repository at this point in the history
* add datasets to cloud workspace

* add read/write parquet

* mypy
  • Loading branch information
mike0sv committed Jul 10, 2024
1 parent ff37584 commit fdd12d2
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/evidently/ui/type_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
STR_UUID = Union[str, uuid.UUID]
PanelID = uuid.UUID
TabID = uuid.UUID
DatasetID = uuid.UUID
ZERO_UUID = uuid.UUID(int=0)


Expand Down
82 changes: 76 additions & 6 deletions src/evidently/ui/workspace/cloud.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from io import BytesIO
from typing import BinaryIO
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Union
from uuid import UUID

import pandas as pd
from requests import HTTPError

from evidently.ui.api.models import OrgModel
Expand All @@ -11,7 +16,9 @@
from evidently.ui.base import ProjectManager
from evidently.ui.base import Team
from evidently.ui.storage.common import NoopAuthManager
from evidently.ui.type_aliases import STR_UUID
from evidently.ui.type_aliases import ZERO_UUID
from evidently.ui.type_aliases import DatasetID
from evidently.ui.type_aliases import OrgID
from evidently.ui.type_aliases import TeamID
from evidently.ui.workspace.remote import NoopBlobStorage
Expand Down Expand Up @@ -61,8 +68,17 @@ def _prepare_request(
body: Optional[dict] = None,
cookies=None,
headers: Dict[str, str] = None,
form_data: bool = False,
):
r = super()._prepare_request(path, method, query_params, body, cookies, headers)
r = super()._prepare_request(
path=path,
method=method,
query_params=query_params,
body=body,
cookies=cookies,
headers=headers,
form_data=form_data,
)
if path == "/api/users/login":
return r
r.cookies[self.token_cookie_name] = self.jwt_token
Expand All @@ -77,16 +93,18 @@ def _request(
response_model=None,
cookies=None,
headers: Dict[str, str] = None,
form_data: bool = False,
):
try:
res = super()._request(
path,
method,
query_params,
body,
response_model,
path=path,
method=method,
query_params=query_params,
body=body,
response_model=response_model,
cookies=cookies,
headers=headers,
form_data=form_data,
)
self._logged_in = True
return res
Expand All @@ -103,6 +121,7 @@ def _request(
response_model,
cookies=cookies,
headers=headers,
form_data=form_data,
)
raise

Expand All @@ -120,6 +139,28 @@ def create_team(self, team: Team, org_id: OrgID = None) -> TeamModel:
response_model=TeamModel,
)

def add_dataset(
self, file: BinaryIO, name: str, org_id: OrgID, team_id: TeamID, description: Optional[str]
) -> DatasetID:
response = self._request(
"/api/datasets/",
"POST",
body={"name": name, "description": description, "file": file},
query_params={"org_id": org_id, "team_id": team_id},
form_data=True,
)
return DatasetID(response.json()["dataset_id"])

def load_dataset(self, dataset_id: DatasetID) -> pd.DataFrame:
response = self._request(f"/api/datasets/{dataset_id}/download", "GET")
return pd.read_parquet(BytesIO(response.content))


class NamedBytesIO(BytesIO):
def __init__(self, initial_bytes: bytes, name: str):
super().__init__(initial_bytes=initial_bytes)
self.name = name


class CloudWorkspace(WorkspaceView):
token: str
Expand Down Expand Up @@ -164,6 +205,35 @@ def create_team(self, name: str, org_id: OrgID) -> Team:
assert isinstance(self.project_manager.metadata, CloudMetadataStorage)
return self.project_manager.metadata.create_team(Team(name=name), org_id).to_team()

def add_dataset(
self,
data_or_path: Union[str, pd.DataFrame],
name: str,
org_id: STR_UUID,
team_id: STR_UUID,
description: Optional[str] = None,
) -> DatasetID:
file: Union[NamedBytesIO, BinaryIO]
assert isinstance(self.project_manager.metadata, CloudMetadataStorage)
if isinstance(org_id, str):
org_id = UUID(org_id)
if isinstance(team_id, str):
team_id = UUID(team_id)
if isinstance(data_or_path, str):
file = open(data_or_path, "rb")
else:
file = NamedBytesIO(b"", "data.parquet")
data_or_path.to_parquet(file)
file.seek(0)
try:
return self.project_manager.metadata.add_dataset(file, name, org_id, team_id, description)
finally:
file.close()

def load_dataset(self, dataset_id: DatasetID) -> pd.DataFrame:
assert isinstance(self.project_manager.metadata, CloudMetadataStorage)
return self.project_manager.metadata.load_dataset(dataset_id)


class CloudAuthManager(NoopAuthManager):
def get_team(self, team_id: TeamID) -> Optional[Team]:
Expand Down
29 changes: 23 additions & 6 deletions src/evidently/ui/workspace/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Optional
from typing import Set
from typing import Type
from typing import Union
from urllib.error import HTTPError

from requests import Request
Expand Down Expand Up @@ -57,20 +58,26 @@ def _prepare_request(
body: Optional[dict] = None,
cookies=None,
headers: Dict[str, str] = None,
form_data: bool = False,
):
# todo: better encoding
cookies = cookies or {}
headers = headers or {}
data = None
data: Optional[Union[Dict, bytes]] = None
files = None
if body is not None:
headers["Content-Type"] = "application/json"

data = json.dumps(body, allow_nan=True, cls=NumpyEncoder).encode("utf8")
if form_data:
data = body
files = {k: body.pop(k) for k in list(body.keys()) if isinstance(body[k], io.IOBase)}
else:
headers["Content-Type"] = "application/json"
data = json.dumps(body, allow_nan=True, cls=NumpyEncoder).encode("utf8")
return Request(
method,
urllib.parse.urljoin(self.get_url(), path),
params=query_params,
data=data,
files=files,
headers=headers,
cookies=cookies,
)
Expand All @@ -84,8 +91,9 @@ def _request(
response_model=None,
cookies=None,
headers: Dict[str, str] = None,
form_data: bool = False,
):
request = self._prepare_request(path, method, query_params, body, cookies, headers)
request = self._prepare_request(path, method, query_params, body, cookies, headers, form_data=form_data)
s = Session()
response = s.send(request.prepare())

Expand Down Expand Up @@ -117,8 +125,17 @@ def _prepare_request(
body: Optional[dict] = None,
cookies=None,
headers: Dict[str, str] = None,
form_data: bool = False,
):
r = super()._prepare_request(path, method, query_params, body, cookies, headers)
r = super()._prepare_request(
path=path,
method=method,
query_params=query_params,
body=body,
cookies=cookies,
headers=headers,
form_data=form_data,
)
if self.secret is not None:
r.headers[SECRET_HEADER_NAME] = self.secret
return r
Expand Down

0 comments on commit fdd12d2

Please sign in to comment.