Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Widget #29

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ Add interactive Code Editor-style HTML reprs to Earth Engine objects in a Jupyte

![eerepr demo expanding the metadata for an image collection](assets/eerepr.gif)


## Features

- **⚡ Async**: `eerepr` uses threading to grab data asynchronously from Earth Engine, meaning that you can display big objects without blocking execution!
- **📦 Caching**: Earth Engine objects are automatically cached to speed up subsequent prints.
- **⚠️ Error Handling**: `eerepr` handles Earth Engine errors gracefully, displaying the message instead of crashing the notebook.

## Setup

Install from PyPI:
Expand All @@ -35,9 +42,3 @@ Importing `eerepr` in a Jupyter notebook adds an HTML repr method to all Earth E

> **Note**
> Just like in the Code Editor, printing huge collections can be slow and may hit memory limits.

## Caching

`eerepr` uses caching to improve performance. Server data will only be requested once for each unique Earth Engine object, and all subsequent requests will be retrieved from the cache until the Jupyter session is restarted.

When you import `eerepr`, it is automatically initialized with an unlimited cache size. You can manually set the number of unique objects to cache using `eerepr.initialize(max_cache_size=n)`. A value of `None` sets an unlimited cache while a value of `0` disables caching.
779 changes: 172 additions & 607 deletions docs/notebooks/demo.ipynb

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion eerepr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,23 @@


class Config:
def __init__(self, max_cache_size: int | None, max_repr_mbs: int):
def __init__(
self, max_cache_size: int | None, max_repr_mbs: int, communication_delay: float
):
self.max_cache_size = max_cache_size
self.max_repr_mbs = max_repr_mbs
self.communication_delay = communication_delay

def __repr__(self):
return json.dumps(self.__dict__, indent=2)


options = Config(
# Max number of EE objects to cache. Set to 0 to disable caching.
max_cache_size=None,
# Max size of repr content in MB to prevent performance issues
max_repr_mbs=100,
# Minimum delay in seconds before updating widgets to prevent communication timing
# issues. Delayed that are too low can break collapsing behavior.
communication_delay=0.1,
)
31 changes: 27 additions & 4 deletions eerepr/html.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import datetime
from html import escape
from itertools import chain
from typing import Any, Hashable

import ee

# Max characters to display for a list before truncating to "List (n elements)"
MAX_INLINE_LENGTH = 50
# Sorting priority for Earth Engine properties
Expand All @@ -18,8 +21,28 @@
]


def convert_to_html(obj: Any, key: Hashable | None = None) -> str:
"""Converts a Python object (not list or dict) to an HTML <li> element.
def build_loading_html(obj: Any) -> str:
"""Build an HTML element to display when asynchronously loading an object."""
spinner = """<div class='ee-spinner'></div>"""
return (
"<span"
f" class='ee-loading'>{spinner} {obj.__class__.__name__} (Computing)</span>"
)


def build_error_html(err: ee.EEException) -> str:
"""Build an HTML element to display an Earth Engine Exception"""
return f"<div class='ee-error'>{escape(str(err))}</div>"


def build_fallback_html(obj: Any) -> str:
"""Build an HTML element to fall back to if something goes wrong in the main repr.
"""
return f"<pre>{escape(repr(obj))}</pre>"


def build_object_html(obj: Any, key: Hashable | None = None) -> str:
"""Build an HTML <li> element from a Python object.

Parameters
----------
Expand All @@ -45,7 +68,7 @@ def list_to_html(obj: list, key: Hashable | None = None) -> str:
noun = "element" if n == 1 else "elements"
header = f"{key}: " if key is not None else ""
header += f"List ({n} {noun})" if len(contents) > MAX_INLINE_LENGTH else contents
children = [convert_to_html(item, key=i) for i, item in enumerate(obj)]
children = [build_object_html(item, key=i) for i, item in enumerate(obj)]

return _make_collapsible_li(header, children)

Expand All @@ -57,7 +80,7 @@ def dict_to_html(obj: dict, key: Hashable | None = None) -> str:

header = f"{key}: " if key is not None else ""
header += label
children = [convert_to_html(value, key=key) for key, value in obj.items()]
children = [build_object_html(value, key=key) for key, value in obj.items()]

return _make_collapsible_li(header, children)

Expand Down
185 changes: 103 additions & 82 deletions eerepr/repr.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,61 @@
from __future__ import annotations

import threading
import time
import uuid
from functools import _lru_cache_wrapper, lru_cache
from html import escape
from importlib.resources import read_text
from typing import Any, Union
from typing import Callable, Type, Union
from warnings import warn

import anywidget
import ee
import traitlets

from eerepr.config import options
from eerepr.html import convert_to_html
from eerepr.html import (
build_error_html,
build_fallback_html,
build_loading_html,
build_object_html,
)
from eerepr.utils import is_nondeterministic, load_css, load_js

REPR_HTML = "_repr_html_"
EEObject = Union[ee.Element, ee.ComputedObject]
EEClass = Type[Union[ee.Element, ee.ComputedObject]]

# Track which html reprs have been set so we can overwrite them if needed.
reprs_set = set()


@lru_cache(maxsize=None)
def _load_css() -> str:
return read_text("eerepr.static.css", "style.css")


@lru_cache(maxsize=None)
def _load_js() -> str:
"""Note: JS is only needed because the CSS `:has()` selector isn't well supported
yet, preventing a pure CSS solution to the collapsible lists.
"""
return read_text("eerepr.static.js", "script.js")


def _attach_html_repr(cls: type, repr: Any) -> None:
"""Add a HTML repr method to an EE class. Only overwrite the method if it was set by
this function.
def _attach_repr(cls: EEClass, repr: Callable) -> None:
"""Add a custom repr method to an EE class. Only overwrite the method if it was set
by this function.
"""
if not hasattr(cls, REPR_HTML) or cls.__name__ in reprs_set:
if not hasattr(cls, "_ipython_display_") or cls.__name__ in reprs_set:
reprs_set.update([cls.__name__])
setattr(cls, REPR_HTML, repr)
cls._ipython_display_ = repr


def _is_nondeterministic(obj: EEObject) -> bool:
"""Check if an object returns nondeterministic results which would break caching.

Currently, this only tests for the case of `ee.List.shuffle(seed=False)`.
"""
invocation = obj.serialize()
shuffled = "List.shuffle" in invocation
false_seed = '"seed": {"constantValue": false}' in invocation
return shuffled and false_seed
def _ipython_display_(obj: EEObject, **kwargs) -> str:
"""IPython display wrapper for Earth Engine objects."""
return EEReprWidget(obj)._ipython_display_(**kwargs)


@lru_cache(maxsize=None)
def _repr_html_(obj: EEObject) -> str:
"""Generate an HTML representation of an EE object."""
def _get_cached_repr(obj: EEObject) -> str:
"""Build or retrieve an HTML repr from an Earth Engine object."""
if is_nondeterministic(obj):
# Prevent caching non-deterministic objects (e.g. ee.List([]).shuffle(False)))
obj._eerepr_id = uuid.uuid4()

try:
info = obj.getInfo()
# Fall back to a string repr if getInfo fails
content = f"<ul>{build_object_html(info)}</ul>"
except ee.EEException as e:
warn(
f"Getting info failed with: '{e}'. Falling back to string repr.",
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"

css = _load_css()
js = _load_js()
body = convert_to_html(info)

return (
"<div>"
f"<style>{css}</style>"
"<div class='ee'>"
f"<ul>{body}</ul>"
"</div>"
f"<script>{js}</script>"
"</div>"
)


def _ee_repr(obj: EEObject) -> str:
"""Wrapper around _repr_html_ to prevent cache hits on nondeterministic objects."""
if _is_nondeterministic(obj):
# We don't want to cache nondeterministic objects, so we'll add add a unique
# attribute that causes ee.ComputedObject.__eq__ to return False, preventing a
# cache hit.
obj._eerepr_id = uuid.uuid4()

rep = _repr_html_(obj)
mbs = len(rep) / 1e6
if mbs > options.max_repr_mbs:
warn(
message=(
f"HTML repr size ({mbs:.0f}mB) exceeds maximum"
f" ({options.max_repr_mbs:.0f}mB), falling back to string repr. You"
" can set `eerepr.options.max_repr_mbs` to print larger objects,"
" but this may cause performance issues."
),
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"
content = build_error_html(e)

return rep
return content


def initialize(max_cache_size: int | None = None) -> None:
Expand All @@ -116,12 +69,80 @@ def initialize(max_cache_size: int | None = None) -> None:
The maximum number of EE objects to cache. If None, the cache size is unlimited.
Set to 0 to disable caching.
"""
global _repr_html_
if isinstance(_repr_html_, _lru_cache_wrapper):
_repr_html_ = _repr_html_.__wrapped__ # type: ignore
global _get_cached_repr
# Unwrap from the LRU cache so we can reset it
if isinstance(_get_cached_repr, _lru_cache_wrapper):
_get_cached_repr = _get_cached_repr.__wrapped__ # type: ignore

# If caching is enabled, rewrap in a new LRU cache
if max_cache_size != 0:
_repr_html_ = lru_cache(maxsize=max_cache_size)(_repr_html_)
_get_cached_repr = lru_cache(maxsize=max_cache_size)(_get_cached_repr)

for cls in [ee.Element, ee.ComputedObject]:
_attach_html_repr(cls, _ee_repr)
_attach_repr(cls, _ipython_display_)


class EEReprWidget(anywidget.AnyWidget):
_esm = load_js()
_css = load_css()

content = traitlets.Unicode().tag(sync=True)

def __init__(self, obj: EEObject, *args, **kwargs):
super().__init__(*args, **kwargs)
self.obj = obj
self.set_content(build_loading_html(obj))
threading.Thread(target=self.update_content).start()

def update_content(self) -> None:
"""
Update the widget content with the cached repr. Currently, this
implementation delays as needed to give time for communication between
the kernel and the frontend to complete.
"""
start = time.time()
rep = _get_cached_repr(self.obj)
elapsed = time.time() - start
if elapsed < options.communication_delay:
time.sleep(options.communication_delay - elapsed)

self.set_content(rep)

def set_content(self, content: str) -> None:
"""Set the widget content, checking content size to avoid crashes from huge
reprs.
"""
mbs = len(content) / 1e6
if mbs > options.max_repr_mbs:
warn(
message=(
f"HTML repr size ({mbs:.0f}mB) exceeds maximum"
f" ({options.max_repr_mbs:.0f}mB), falling back to string repr. You"
" can set `eerepr.options.max_repr_mbs` to display larger objects,"
" but this may cause performance issues."
),
stacklevel=2,
)
content = build_fallback_html(self.obj)

self.content = content

def _ipython_display_(self, **kwargs):
"""
Display the widget in an IPython kernel.

We dynamically choose _ipython_display_ or _repr_mimebundle_ based on which
is supported by the widget, which is determined by anywidget based on the
environment. See https://github.com/manzt/anywidget/issues/48 for details.
"""

if hasattr(super(), "_ipython_display_"):
# ipywidgets v7
super()._ipython_display_(**kwargs)
else:
import ipywidgets
from IPython.display import display

# ipywidgets v8
data = ipywidgets.DOMWidget._repr_mimebundle_(self, **kwargs)
display(data, raw=True)
Loading