some text
") - self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel) + self.assertRaises( + TypeError, lambda s: pickle.dumps(s, protocol=2), sel + ) def test_pickle_selector_list(self) -> None: sel = self.sscls( @@ -33,7 +34,9 @@ def test_pickle_selector_list(self) -> None: empty_sel_list = sel.css("p") self.assertIsSelectorList(sel_list) self.assertIsSelectorList(empty_sel_list) - self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel_list) + self.assertRaises( + TypeError, lambda s: pickle.dumps(s, protocol=2), sel_list + ) self.assertRaises( TypeError, lambda s: pickle.dumps(s, protocol=2), empty_sel_list ) @@ -49,7 +52,8 @@ def test_simple_selection(self) -> None: self.assertIsSelector(x) self.assertEqual( - sel.xpath("//input").extract(), [x.extract() for x in sel.xpath("//input")] + sel.xpath("//input").extract(), + [x.extract() for x in sel.xpath("//input")], ) self.assertEqual( @@ -84,7 +88,10 @@ def test_simple_selection_with_variables(self) -> None: sel = self.sscls(text=body) self.assertEqual( - [x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)], + [ + x.extract() + for x in sel.xpath("//input[@value=$number]/@name", number=1) + ], ["a"], ) self.assertEqual( @@ -97,18 +104,24 @@ def test_simple_selection_with_variables(self) -> None: self.assertEqual( sel.xpath( - "count(//input[@value=$number or @name=$letter])", number=2, letter="a" + "count(//input[@value=$number or @name=$letter])", + number=2, + letter="a", ).extract(), ["2.0"], ) # you can also pass booleans self.assertEqual( - sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=2, test=True).extract(), + sel.xpath( + "boolean(count(//input)=$cnt)=$test", cnt=2, test=True + ).extract(), ["1"], ) self.assertEqual( - sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=4, test=True).extract(), + sel.xpath( + "boolean(count(//input)=$cnt)=$test", cnt=4, test=True + ).extract(), ["0"], ) self.assertEqual( @@ -138,11 +151,16 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None: t = 'I say "Yeah!"' # naive string formatting with give something like: # ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name - self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name') + self.assertRaises( + ValueError, sel.xpath, f'//input[@value="{t}"]/@name' + ) # with XPath variables, escaping is done for you self.assertEqual( - [x.extract() for x in sel.xpath("//input[@value=$text]/@name", text=t)], + [ + x.extract() + for x in sel.xpath("//input[@value=$text]/@name", text=t) + ], ["a"], ) lt = """I'm mixing single and "double quotes" and I don't care :)""" @@ -155,7 +173,9 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None: self.assertEqual( [ x.extract() - for x in sel.xpath("//p[normalize-space()=$lng]//@name", lng=lt) + for x in sel.xpath( + "//p[normalize-space()=$lng]//@name", lng=lt + ) ], ["a"], ) @@ -179,7 +199,9 @@ def test_accessing_attributes(self) -> None: ) # for a SelectorList, bring the attributes of first-element only - self.assertEqual({"id": "some-list", "class": "list-cls"}, sel.css("ul").attrib) + self.assertEqual( + {"id": "some-list", "class": "list-cls"}, sel.css("ul").attrib + ) self.assertEqual( {"class": "item-cls", "id": "list-item-1"}, sel.css("li").attrib ) @@ -199,7 +221,9 @@ def test_representation_slice(self) -> None: body = f"" sel = self.sscls(text=body) - representation = f"test
" self.assertEqual( - type(self.sscls(text=text).xpath("//p")[0]), type(self.sscls(text=text)) + type(self.sscls(text=text).xpath("//p")[0]), + type(self.sscls(text=text)), ) self.assertEqual( - type(self.sscls(text=text).css("p")[0]), type(self.sscls(text=text)) + type(self.sscls(text=text).css("p")[0]), + type(self.sscls(text=text)), ) def test_boolean_result(self) -> None: body = "
" xs = self.sscls(text=body) - self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"]) - self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), ["0"]) + self.assertEqual( + xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"] + ) + self.assertEqual( + xs.xpath("//input[@name='a']/@name='n'").extract(), ["0"] + ) def test_differences_parsing_xml_vs_html(self) -> None: """Test that XML and HTML Selector's behave differently""" @@ -355,7 +397,8 @@ def test_differences_parsing_xml_vs_html(self) -> None: text = 'Hello
Hello
Hello
Grainy
" self.assertEqual( - "Grainy
", self.sscls(text).extract() + "Grainy
", + self.sscls(text).extract(), ) def test_remove_selector_list(self) -> None: @@ -1006,6 +1100,41 @@ def test_remove_root_element_selector(self) -> None: sel.css("body").remove() self.assertEqual(sel.get(), "") + def test_selector_init_with_etree_element(self): + from lxml import etree + + element = etree.Element("root") + sel = self.sscls(root=element) + self.assertEqual(sel.type, "html") + + def test_invalid_json(self) -> None: + invalid_json = "root" + sel = self.sscls(text=invalid_json, type="json") + self.assertEqual(sel.root, None) + + def test_invalid_selector_calls(self) -> None: + json = '{"attrib":value}' + sel = self.sscls(text=json, type="json") + + with self.assertRaises(ValueError): + sel.xpath("query") + with self.assertRaises(ValueError): + sel.css("query") + + def test_xpath_selector_on_type_text(self) -> None: + html = "" + sel = self.sscls(text=html, type="text") + + sel_list = sel.xpath("//html") + self.assertEqual(sel_list[0].type, "html") + + def test_css_selector_on_type_text(self) -> None: + html = "" + sel = self.sscls(text=html, type="text") + + sel_list = sel.css("html") + self.assertEqual(sel_list[0].type, "html") + def test_jsonpath_selectors(self) -> None: json_data = """{ @@ -1046,13 +1175,14 @@ def test_jsonpath_selectors(self) -> None: self.assertIsSelector(sel) self.assertIsSelectorList(sel_list) self.assertEqual( - sel_list.getall(), ['"Nigel Rees"', '"Herman Melville"', '"J.R.R. Tolkien"'] + sel_list.getall(), + ['"Nigel Rees"', '"Herman Melville"', '"J.R.R. Tolkien"'], ) sel_list = sel.jsonpath("$..bicycle") self.assertIsSelectorList(sel_list) - self.assertEqual(sel_list.getall(), ['{"color": "red", "price": 19.95}']) + self.assertEqual(sel_list.get(), '{"color": "red", "price": 19.95}') inner_lst = sel_list[0].jsonpath("$..color") @@ -1064,9 +1194,16 @@ def test_jsonpath_selectors(self) -> None: self.assertIsSelectorList(sel_list) self.assertEqual( sel_list.getall(), - ['"Sayings of the Century"', '"Moby Dick"', '"The Lord of the Rings"'], + [ + '"Sayings of the Century"', + '"Moby Dick"', + '"The Lord of the Rings"', + ], ) + sel_list_empty = sel.jsonpath("$..contact") + self.assertEqual(sel_list_empty, []) + class ExsltTestCase(unittest.TestCase): @@ -1087,18 +1224,30 @@ def test_regexp(self) -> None: # re:test() self.assertEqual( sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]').extract(), - [x.extract() for x in sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]')], + [ + x.extract() + for x in sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]') + ], ) self.assertEqual( - [x.extract() for x in sel.xpath(r'//a[re:test(@href, "\.html$")]/text()')], + [ + x.extract() + for x in sel.xpath(r'//a[re:test(@href, "\.html$")]/text()') + ], ["first link", "second link"], ) self.assertEqual( - [x.extract() for x in sel.xpath('//a[re:test(@href, "first")]/text()')], + [ + x.extract() + for x in sel.xpath('//a[re:test(@href, "first")]/text()') + ], ["first link"], ) self.assertEqual( - [x.extract() for x in sel.xpath('//a[re:test(@href, "second")]/text()')], + [ + x.extract() + for x in sel.xpath('//a[re:test(@href, "second")]/text()') + ], ["second link"], ) @@ -1128,7 +1277,9 @@ def test_regexp(self) -> None: r're:replace(//a[re:test(@href, "\.xml$")]/@href,' r'"(\w+)://(.+)(\.xml)", "","https://\2.html")' ).extract(), - ["https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html"], + [ + "https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html" + ], ) def test_set(self) -> None: diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py index 1df3cfe4..c720006f 100644 --- a/tests/test_selector_csstranslator.py +++ b/tests/test_selector_csstranslator.py @@ -53,7 +53,10 @@ def test_attr_function(self): cases = [ ("::attr(name)", "descendant-or-self::*/@name"), ("a::attr(href)", "descendant-or-self::a/@href"), - ("a ::attr(img)", "descendant-or-self::a/descendant-or-self::*/@img"), + ( + "a ::attr(img)", + "descendant-or-self::a/descendant-or-self::*/@img", + ), ("a > ::attr(class)", "descendant-or-self::a/*/@class"), ] for css, xpath in cases: @@ -149,7 +152,9 @@ def setUp(self): self.sel = self.sscls(text=HTMLBODY) def x(self, *a, **kw): - return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()] + return [ + v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip() + ] def test_selector_simple(self): for x in self.sel.css("input"): @@ -165,10 +170,13 @@ def test_text_pseudo_element(self): self.assertEqual(self.x("#p-b2 ::text"), ["guy"]) self.assertEqual(self.x("#paragraph::text"), ["lorem ipsum text"]) self.assertEqual( - self.x("#paragraph ::text"), ["lorem ipsum text", "hi", "there", "guy"] + self.x("#paragraph ::text"), + ["lorem ipsum text", "hi", "there", "guy"], ) self.assertEqual(self.x("p::text"), ["lorem ipsum text"]) - self.assertEqual(self.x("p ::text"), ["lorem ipsum text", "hi", "there", "guy"]) + self.assertEqual( + self.x("p ::text"), ["lorem ipsum text", "hi", "there", "guy"] + ) def test_attribute_function(self): self.assertEqual(self.x("#p-b2::attr(id)"), ["p-b2"]) @@ -181,7 +189,9 @@ def test_attribute_function(self): ) def test_nested_selector(self): - self.assertEqual(self.sel.css("p").css("b::text").extract(), ["hi", "guy"]) + self.assertEqual( + self.sel.css("p").css("b::text").extract(), ["hi", "guy"] + ) self.assertEqual( self.sel.css("div").css("area:last-child").extract(), [''], diff --git a/tests/test_utils.py b/tests/test_utils.py index 556892c1..e2bca559 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -46,7 +46,12 @@ def test_shorten(width, expected): True, ["October"], ], - [r"\w+\s*\d+\s*\,?\s*\d+", "October 25 2019", True, ["October 25 2019"]], + [ + r"\w+\s*\d+\s*\,?\s*\d+", + "October 25 2019", + True, + ["October 25 2019"], + ], [ r"^.*$", ""sometext" & "moretext"", diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py index af710946..744472a9 100644 --- a/tests/test_xpathfuncs.py +++ b/tests/test_xpathfuncs.py @@ -17,13 +17,21 @@ def test_has_class_simple(self): ["First", "Second"], ) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')], ["Third"] + [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')], + ["Third"], ) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')], [] + [ + x.extract() + for x in sel.xpath('//p[has-class("foo","bar")]/text()') + ], + [], ) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')], + [ + x.extract() + for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()') + ], ["First"], ) @@ -45,7 +53,10 @@ def test_has_class_error_invalid_arg_type(self): """ sel = Selector(text=body) self.assertRaisesRegex( - ValueError, "has-class arguments must be strings", sel.xpath, "has-class(.)" + ValueError, + "has-class arguments must be strings", + sel.xpath, + "has-class(.)", ) def test_has_class_error_invalid_unicode(self): @@ -66,7 +77,8 @@ def test_has_class_unicode(self): """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("fóó")]/text()')], ["First"] + [x.extract() for x in sel.xpath('//p[has-class("fóó")]/text()')], + ["First"], ) def test_has_class_uppercase(self): @@ -75,7 +87,8 @@ def test_has_class_uppercase(self): """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], ["First"] + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + ["First"], ) def test_has_class_newline(self): @@ -85,7 +98,8 @@ def test_has_class_newline(self): """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], ["First"] + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + ["First"], ) def test_has_class_tab(self): @@ -94,7 +108,8 @@ def test_has_class_tab(self): """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], ["First"] + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + ["First"], ) def test_set_xpathfunc(self): @@ -108,7 +123,10 @@ def myfunc(ctx): """ sel = Selector(text=body) self.assertRaisesRegex( - ValueError, "Unregistered function in myfunc", sel.xpath, "myfunc()" + ValueError, + "Unregistered function in myfunc", + sel.xpath, + "myfunc()", ) set_xpathfunc("myfunc", myfunc) @@ -117,5 +135,8 @@ def myfunc(ctx): set_xpathfunc("myfunc", None) self.assertRaisesRegex( - ValueError, "Unregistered function in myfunc", sel.xpath, "myfunc()" + ValueError, + "Unregistered function in myfunc", + sel.xpath, + "myfunc()", ) diff --git a/tox.ini b/tox.ini index 58dbc47c..8dba8d30 100644 --- a/tox.ini +++ b/tox.ini @@ -39,7 +39,7 @@ commands = deps = black commands = - black --check {posargs:parsel tests docs conftest.py setup.py} + black --line-length=79 --check {posargs:parsel tests docs conftest.py setup.py} [docs] changedir = docs