diff --git a/extruct/w3cmicrodata.py b/extruct/w3cmicrodata.py index 89a79b3c..0a7dbe6a 100644 --- a/extruct/w3cmicrodata.py +++ b/extruct/w3cmicrodata.py @@ -235,6 +235,17 @@ def _extract_property_value(self, node, items_seen, base_url, itemids, force=Fal elif node.get("content"): return node.get("content") + # https://schema.org/docs/actions.html#part-4 + elif (itemprop := node.get("itemprop")) and ( + itemprop.endswith("-input") or itemprop.endswith("-output") + ): + result = {} + if "required" in node.attrib: + result["valueRequired"] = True + if name := node.get("name"): + result["valueName"] = name + return result + else: return self._extract_textContent(node) diff --git a/tests/samples/schema.org/SearchAction.001.html b/tests/samples/schema.org/SearchAction.001.html new file mode 100644 index 00000000..ef493125 --- /dev/null +++ b/tests/samples/schema.org/SearchAction.001.html @@ -0,0 +1,8 @@ +
+ +
+ + + +
+
diff --git a/tests/samples/schema.org/SearchAction.001.json b/tests/samples/schema.org/SearchAction.001.json new file mode 100644 index 00000000..bd967365 --- /dev/null +++ b/tests/samples/schema.org/SearchAction.001.json @@ -0,0 +1 @@ +[{"type": "https://schema.org/WebSite", "properties": {"url": "https://www.example.com/", "potentialAction": {"type": "https://schema.org/SearchAction", "properties": {"target": "https://query.example.com/search?q={search_term_string}", "query-input": {"valueRequired": true, "valueName": "search_term_string"}}}}}] \ No newline at end of file diff --git a/tests/test_microdata.py b/tests/test_microdata.py index c1168a02..eda2d26c 100644 --- a/tests/test_microdata.py +++ b/tests/test_microdata.py @@ -10,6 +10,17 @@ class TestMicrodata(unittest.TestCase): maxDiff = None + def _test_schemaorg(self, schema, indexes=None): + indexes = indexes or [1] + for i in indexes: + body = get_testdata("schema.org", f"{schema}.{i:03d}.html") + expected = json.loads( + get_testdata("schema.org", f"{schema}.{i:03d}.json").decode() + ) + mde = MicrodataExtractor() + data = mde.extract(body) + self.assertEqual(data, expected) + def test_schemaorg_CreativeWork(self): for i in [1]: body = get_testdata("schema.org", "CreativeWork.{:03d}.html".format(i)) @@ -63,6 +74,9 @@ def test_schemaorg_Event(self): self.assertEqual(data, expected) + def test_schemaorg_SearchAction(self): + self._test_schemaorg("SearchAction") + def test_w3c_textContent_values(self): body = get_testdata("w3c", "microdata.4.2.strings.html") expected = json.loads(