Skip to content

Commit

Permalink
Tidy up
Browse files Browse the repository at this point in the history
  • Loading branch information
ines committed Dec 9, 2024
1 parent caa393e commit 9647ce6
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
4 changes: 1 addition & 3 deletions spacy_layout/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,16 +150,14 @@ def _texts_to_doc(
def _get_span_layout(
self, item: DoclingItem, pages: dict[int, PageLayout]
) -> SpanLayout | None:
bounding_box = None
if item.prov:
prov = item.prov[0]
page = pages[prov.page_no]
if page.width and page.height:
x, y, width, height = get_bounding_box(prov.bbox, page.height)
bounding_box = SpanLayout(
return SpanLayout(
x=x, y=y, width=width, height=height, page_no=prov.page_no
)
return bounding_box

def get_pages(self, doc: Doc) -> list[tuple[PageLayout, list[Span]]]:
"""Get all pages and their layout spans."""
Expand Down
10 changes: 5 additions & 5 deletions spacy_layout/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import dataclasses
from typing import TYPE_CHECKING, Callable
from typing import TYPE_CHECKING, Any, Callable

from docling_core.types.doc.base import CoordOrigin
from pandas import DataFrame
Expand All @@ -13,7 +13,7 @@
OBJ_TYPES = {"SpanLayout": SpanLayout, "DocLayout": DocLayout, "PageLayout": PageLayout}


def encode_obj(obj, chain: Callable | None = None):
def encode_obj(obj: Any, chain: Callable | None = None) -> Any:
"""Convert custom dataclass to dict for serialization."""
if isinstance(obj, tuple(OBJ_TYPES.values())):
result = dataclasses.asdict(obj)
Expand All @@ -22,22 +22,22 @@ def encode_obj(obj, chain: Callable | None = None):
return obj if chain is None else chain(obj)


def decode_obj(obj, chain: Callable | None = None):
def decode_obj(obj: Any, chain: Callable | None = None) -> Any:
"""Load custom dataclass from serialized dict."""
if isinstance(obj, dict) and obj.get(TYPE_ATTR) in OBJ_TYPES:
obj_type = obj.pop(TYPE_ATTR)
return OBJ_TYPES[obj_type].from_dict(obj)
return obj if chain is None else chain(obj)


def encode_df(obj, chain: Callable | None = None):
def encode_df(obj: Any, chain: Callable | None = None) -> Any:
"""Convert pandas.DataFrame for serialization."""
if isinstance(obj, DataFrame):
return {"data": obj.to_dict(), TYPE_ATTR: "DataFrame"}
return obj if chain is None else chain(obj)


def decode_df(obj, chain: Callable | None = None):
def decode_df(obj: Any, chain: Callable | None = None) -> Any:
"""Load pandas.DataFrame from serialized data."""
if isinstance(obj, dict) and obj.get(TYPE_ATTR) == "DataFrame":
return DataFrame(obj["data"])
Expand Down

0 comments on commit 9647ce6

Please sign in to comment.