Skip to content

Commit

Permalink
Add SerpMetadata fields to Metadata (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Nov 21, 2024
1 parent 6264eb4 commit f87d98b
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 10 deletions.
6 changes: 5 additions & 1 deletion docs/reference/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ Item metadata components

.. autoclass:: zyte_common_items.Metadata(**kwargs)
:show-inheritance:
:members: dateDownloaded, probability, searchText, validationMessages
:members: dateDownloaded, displayedQuery, probability, searchedQuery, searchText, totalOrganicResults, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.ProbabilityMetadata(**kwargs)
:show-inheritance:
Expand All @@ -109,6 +109,10 @@ Item metadata components
:show-inheritance:
:members: probability, dateDownloaded, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.SearchMetadata(**kwargs)
:show-inheritance:
:members: dateDownloaded, probability, searchText, validationMessages, get_date_downloaded_parsed

.. autoclass:: zyte_common_items.BaseMetadata(**kwargs)
:show-inheritance:
:members: cast
Expand Down
27 changes: 27 additions & 0 deletions tests/test_components.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import datetime

import attrs
from web_poet import RequestUrl

from zyte_common_items import (
Address,
AggregateRating,
Amenity,
BaseMetadata,
BaseSalary,
Breadcrumb,
BusinessPlaceMetadata,
Expand Down Expand Up @@ -72,6 +74,31 @@ def test_metadata_get_date_downloaded():
)


def get_all_subclasses(cls):
subclasses = set()
for subclass in cls.__subclasses__():
subclasses.add(subclass)
subclasses.update(get_all_subclasses(subclass))
return subclasses


def test_metadata_fields():
"""Metadata must contain a superset of the fields of all metadata
classes."""
superset = set(attrs.fields_dict(Metadata))
for cls in get_all_subclasses(BaseMetadata):
subset = set(attrs.fields_dict(cls))
assert subset.issubset(
superset
), f"Metadata is missing some fields from {cls.__name__}: {subset - superset}"


def test_metadata_subclasses():
"""Metadata should not be subclassed, since its fields will grow as new
specific metadata classes are added."""
assert not get_all_subclasses(Metadata)


def test_named_link_optional_fields():
NamedLink(name="foo")
NamedLink(url="https://example.com")
Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Reactions,
RealEstateArea,
Request,
SearchMetadata,
SocialMediaPostAuthor,
StarRating,
Topic,
Expand Down
1 change: 1 addition & 0 deletions zyte_common_items/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Metadata,
MetadataT,
ProbabilityMetadata,
SearchMetadata,
)
from .ratings import AggregateRating, StarRating
from .real_estate import RealEstateArea
Expand Down
21 changes: 18 additions & 3 deletions zyte_common_items/components/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,28 @@ class DetailsMetadata(ListMetadata):


@attrs.define(kw_only=True)
class Metadata(DetailsMetadata):
class SearchMetadata(DetailsMetadata):
"""Minimal metadata for classes of items that can declare search
metadata."""

#: The search text used to find the item.
searchText: Optional[str] = None


@attrs.define(kw_only=True)
class Metadata(SearchMetadata):
"""Generic metadata class.
It defines all attributes of metadata classes for specific item types, so
that it can be used during extraction instead of a more specific class, and
later converted to the corresponding, more specific metadata class.
"""

#: The search text used to find the item.
searchText: Optional[str] = None
#: Search query as seen in the webpage.
displayedQuery: Optional[str] = None

#: Search query as specified in the input URL.
searchedQuery: Optional[str] = None

#: Total number of organic results reported by the search engine.
totalOrganicResults: Optional[int] = None
4 changes: 2 additions & 2 deletions zyte_common_items/items/business_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
AggregateRating,
Amenity,
Image,
Metadata,
NamedLink,
OpeningHoursItem,
ParentPlace,
SearchMetadata,
StarRating,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str_optional


@attrs.define(kw_only=True)
class BusinessPlaceMetadata(Metadata):
class BusinessPlaceMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.BusinessPlace.metadata`."""


Expand Down
4 changes: 2 additions & 2 deletions zyte_common_items/items/job_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
BaseSalary,
HiringOrganization,
JobLocation,
Metadata,
SearchMetadata,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str


@attrs.define(kw_only=True)
class JobPostingMetadata(Metadata):
class JobPostingMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.JobPosting.metadata`."""


Expand Down
9 changes: 7 additions & 2 deletions zyte_common_items/items/social_media_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
import attrs

from zyte_common_items.base import Item
from zyte_common_items.components import Metadata, Reactions, SocialMediaPostAuthor, Url
from zyte_common_items.components import (
Reactions,
SearchMetadata,
SocialMediaPostAuthor,
Url,
)
from zyte_common_items.converters import to_metadata_optional, url_to_str


@attrs.define(kw_only=True)
class SocialMediaPostMetadata(Metadata):
class SocialMediaPostMetadata(SearchMetadata):
"""Metadata class for :data:`zyte_common_items.SocialMediaPost.metadata`."""


Expand Down

0 comments on commit f87d98b

Please sign in to comment.