From 1218ba92b6df39941160b641606ddd03b602ccd1 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 18:58:30 -0400 Subject: [PATCH 1/6] Add UC Mode "driver" methods directly into the "SB" API --- seleniumbase/fixtures/base_case.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py index 2316c721df1..0a23225eb1e 100644 --- a/seleniumbase/fixtures/base_case.py +++ b/seleniumbase/fixtures/base_case.py @@ -4134,6 +4134,23 @@ def get_new_driver( self.__dont_record_open = True self.open(new_start_page) self.__dont_record_open = False + if undetectable: + if hasattr(new_driver, "uc_open"): + self.uc_open = new_driver.uc_open + if hasattr(new_driver, "uc_open_with_tab"): + self.uc_open_with_tab = new_driver.uc_open_with_tab + if hasattr(new_driver, "uc_open_with_reconnect"): + self.uc_open_with_reconnect = new_driver.uc_open_with_reconnect + if hasattr(new_driver, "reconnect"): + self.reconnect = new_driver.reconnect + if hasattr(new_driver, "disconnect"): + self.disconnect = new_driver.disconnect + if hasattr(new_driver, "connect"): + self.connect = new_driver.connect + if hasattr(new_driver, "uc_click"): + self.uc_click = new_driver.uc_click + if hasattr(new_driver, "uc_switch_to_frame"): + self.uc_switch_to_frame = new_driver.uc_switch_to_frame return new_driver def switch_to_driver(self, driver): From ce3d5147bbb338a51ac422e8df47cc5a168b9f32 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 19:00:32 -0400 Subject: [PATCH 2/6] Update tag names for the UC Mode delayed click --- seleniumbase/undetected/webelement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/undetected/webelement.py b/seleniumbase/undetected/webelement.py index 6b540f0c52d..1db44f30494 100644 --- a/seleniumbase/undetected/webelement.py +++ b/seleniumbase/undetected/webelement.py @@ -14,7 +14,7 @@ def uc_click( ): if driver and selector and by: delayed_click = False - if tag_name == "span" or tag_name == "button" or tag_name == "div": + if tag_name in ["span", "button", "div", "a"]: delayed_click = True if delayed_click and ":contains" not in selector: selector = js_utils.convert_to_css_selector(selector, by) From 7f9dba82f98635f0e3e71553f7d973a88dbe9e1a Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 19:04:23 -0400 Subject: [PATCH 3/6] Refresh Python dependencies --- mkdocs_build/requirements.txt | 2 +- requirements.txt | 6 +++--- setup.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mkdocs_build/requirements.txt b/mkdocs_build/requirements.txt index 58890a07f06..4b0523b3b95 100644 --- a/mkdocs_build/requirements.txt +++ b/mkdocs_build/requirements.txt @@ -3,7 +3,7 @@ regex>=2023.12.25 pymdown-extensions>=10.7.1 -pipdeptree>=2.17.0 +pipdeptree>=2.18.0 python-dateutil>=2.8.2 Markdown==3.6 markdown2==2.4.13 diff --git a/requirements.txt b/requirements.txt index d3f3c1f0bb7..9e6dd59c157 100755 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ wheel>=0.43.0;python_version>="3.8" attrs>=23.2.0 certifi>=2024.2.2 filelock>=3.12.2;python_version<"3.8" -filelock>=3.13.3;python_version>="3.8" +filelock>=3.13.4;python_version>="3.8" platformdirs>=4.0.0;python_version<"3.8" platformdirs>=4.2.0;python_version>="3.8" typing-extensions>=4.11.0;python_version>="3.8" @@ -15,7 +15,7 @@ parse>=1.20.1 parse-type>=0.6.2 pyyaml>=6.0.1 six==1.16.0 -idna==3.6 +idna==3.7 chardet==5.2.0 charset-normalizer==3.3.2 urllib3>=1.26.18,<2;python_version<"3.10" @@ -35,7 +35,7 @@ cssselect==1.2.0 sortedcontainers==2.4.0 fasteners==0.19 execnet==2.0.2;python_version<"3.8" -execnet==2.1.0;python_version>="3.8" +execnet==2.1.1;python_version>="3.8" iniconfig==2.0.0 pluggy==1.2.0;python_version<"3.8" pluggy==1.4.0;python_version>="3.8" diff --git a/setup.py b/setup.py index 64cd01c7dea..61476ff7734 100755 --- a/setup.py +++ b/setup.py @@ -155,7 +155,7 @@ 'attrs>=23.2.0', "certifi>=2024.2.2", 'filelock>=3.12.2;python_version<"3.8"', - 'filelock>=3.13.3;python_version>="3.8"', + 'filelock>=3.13.4;python_version>="3.8"', 'platformdirs>=4.0.0;python_version<"3.8"', 'platformdirs>=4.2.0;python_version>="3.8"', 'typing-extensions>=4.11.0;python_version>="3.8"', @@ -163,7 +163,7 @@ 'parse-type>=0.6.2', 'pyyaml>=6.0.1', "six==1.16.0", - "idna==3.6", + "idna==3.7", 'chardet==5.2.0', 'charset-normalizer==3.3.2', 'urllib3>=1.26.18,<2;python_version<"3.10"', @@ -183,7 +183,7 @@ "sortedcontainers==2.4.0", 'fasteners==0.19', 'execnet==2.0.2;python_version<"3.8"', - 'execnet==2.1.0;python_version>="3.8"', + 'execnet==2.1.1;python_version>="3.8"', 'iniconfig==2.0.0', 'pluggy==1.2.0;python_version<"3.8"', 'pluggy==1.4.0;python_version>="3.8"', From 9361c1c88b7680c0ad86ef6541f6519b4a5e08c8 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 19:05:46 -0400 Subject: [PATCH 4/6] Update examples --- examples/raw_ahrefs.py | 1 - examples/raw_form_turnstile.py | 3 ++- examples/raw_nopecha.py | 2 +- examples/raw_order_tickets.py | 11 +++++++++++ examples/raw_turnstile.py | 5 ++--- 5 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 examples/raw_order_tickets.py diff --git a/examples/raw_ahrefs.py b/examples/raw_ahrefs.py index e3460528729..35ae621fbea 100644 --- a/examples/raw_ahrefs.py +++ b/examples/raw_ahrefs.py @@ -1,6 +1,5 @@ from seleniumbase import SB - with SB(uc=True, test=True, locale_code="en") as sb: url = "https://ahrefs.com/website-authority-checker" input_field = 'input[placeholder="Enter domain"]' diff --git a/examples/raw_form_turnstile.py b/examples/raw_form_turnstile.py index e6597bf3376..3b383b99cbc 100644 --- a/examples/raw_form_turnstile.py +++ b/examples/raw_form_turnstile.py @@ -1,7 +1,8 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: - sb.driver.uc_open_with_reconnect("seleniumbase.io/apps/form_turnstile", 3) + url = "seleniumbase.io/apps/form_turnstile" + sb.driver.uc_open_with_reconnect(url, 2) sb.press_keys("#name", "SeleniumBase") sb.press_keys("#email", "test@test.test") sb.press_keys("#phone", "1-555-555-5555") diff --git a/examples/raw_nopecha.py b/examples/raw_nopecha.py index 0d193fe7b68..28f3523ce98 100644 --- a/examples/raw_nopecha.py +++ b/examples/raw_nopecha.py @@ -1,7 +1,7 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: - sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 3.4) + sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 4) if sb.is_element_visible("#example-container0 iframe"): sb.switch_to_frame("#example-container0 iframe") if not sb.is_element_visible("circle.success-circle"): diff --git a/examples/raw_order_tickets.py b/examples/raw_order_tickets.py new file mode 100644 index 00000000000..1f53bde42fb --- /dev/null +++ b/examples/raw_order_tickets.py @@ -0,0 +1,11 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block_on=True) as sb: + url = "https://www.thaiticketmajor.com/concert/" + sb.driver.uc_open_with_reconnect(url, 5.5) + sb.driver.uc_click("button.btn-signin", 4) + sb.switch_to_frame('iframe[title*="Cloudflare"]') + sb.assert_element("div#success svg#success-icon") + sb.switch_to_default_content() + sb.set_messenger_theme(location="top_center") + sb.post_message("SeleniumBase wasn't detected!") diff --git a/examples/raw_turnstile.py b/examples/raw_turnstile.py index 59f0c648d02..7a2ecee07a7 100644 --- a/examples/raw_turnstile.py +++ b/examples/raw_turnstile.py @@ -2,9 +2,8 @@ def open_the_turnstile_page(sb): - sb.driver.uc_open_with_reconnect( - "seleniumbase.io/apps/turnstile", reconnect_time=3, - ) + url = "seleniumbase.io/apps/turnstile" + sb.driver.uc_open_with_reconnect(url, reconnect_time=2) def click_turnstile_and_verify(sb): From 5e1a757de41624c4dd1f59b2d546c827db228702 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 19:06:15 -0400 Subject: [PATCH 5/6] Update the documentation --- README.md | 6 ++-- help_docs/uc_mode.md | 69 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c1b188d9acf..3364795f3fd 100755 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

SeleniumBase

-

SeleniumBase

+

SeleniumBase

All-in-one Browser Automation Framework:
Web Crawling / Testing / Scraping / Stealth

@@ -102,7 +102,7 @@ pytest test_demo_site.py -------- -

SeleniumBase

+

SeleniumBase

Explore the README:

@@ -1371,7 +1371,7 @@ pytest --reruns=1 --reruns-delay=1

-
+
SeleniumBase Docs
Tested with SeleniumBase
Gitter chat
SeleniumBase PyPI downloads
diff --git a/help_docs/uc_mode.md b/help_docs/uc_mode.md index 20ba73f61cd..48ebbc47d41 100644 --- a/help_docs/uc_mode.md +++ b/help_docs/uc_mode.md @@ -19,17 +19,21 @@ from seleniumbase import Driver driver = Driver(uc=True) -driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3) +url = "https://gitlab.com/users/sign_in" +driver.uc_open_with_reconnect(url, 3) driver.quit() ``` + + πŸ‘€ Here's an example with the SB manager (which has more methods and functionality than the Driver format): ```python from seleniumbase import SB with SB(uc=True) as sb: - sb.driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3) + url = "https://gitlab.com/users/sign_in" + sb.driver.uc_open_with_reconnect(url, 3) ``` πŸ‘€ Here's a longer example, which includes a retry if the CAPTCHA isn't bypassed on the first attempt: @@ -55,9 +59,8 @@ with SB(uc=True, test=True) as sb: from seleniumbase import SB def open_the_turnstile_page(sb): - sb.driver.uc_open_with_reconnect( - "https://seleniumbase.io/apps/turnstile", reconnect_time=3, - ) + url = "seleniumbase.io/apps/turnstile" + sb.driver.uc_open_with_reconnect(url, reconnect_time=2) def click_turnstile_and_verify(sb): sb.switch_to_frame("iframe") @@ -77,6 +80,46 @@ with SB(uc=True, test=True) as sb: +πŸ‘€ Here's an example where the CAPTCHA appears after submitting a form: + +```python +from seleniumbase import SB + +with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://ahrefs.com/website-authority-checker" + input_field = 'input[placeholder="Enter domain"]' + submit_button = 'span:contains("Check Authority")' + sb.driver.uc_open_with_reconnect(url, 1) # The bot-check is later + sb.type(input_field, "github.com/seleniumbase/SeleniumBase") + sb.driver.reconnect(0.1) + sb.driver.uc_click(submit_button, reconnect_time=4) + sb.wait_for_text_not_visible("Checking", timeout=10) + sb.highlight('p:contains("github.com/seleniumbase/SeleniumBase")') + sb.highlight('a:contains("Top 100 backlinks")') + sb.set_messenger_theme(location="bottom_center") + sb.post_message("SeleniumBase wasn't detected!") +``` + + + +πŸ‘€ Here, the CAPTCHA appears after clicking to go to the sign-in screen: + +```python +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block_on=True) as sb: + url = "https://www.thaiticketmajor.com/concert/" + sb.driver.uc_open_with_reconnect(url, 5.5) + sb.driver.uc_click("button.btn-signin", 4) + sb.switch_to_frame('iframe[title*="Cloudflare"]') + sb.assert_element("div#success svg#success-icon") + sb.switch_to_default_content() + sb.set_messenger_theme(location="top_center") + sb.post_message("SeleniumBase wasn't detected!") +``` + + + -------- πŸ‘€ In UC Mode, driver.get(url) has been modified from its original version: If anti-bot services are detected from a requests.get(url) call that's made before navigating to the website, then driver.uc_open_with_reconnect(url) will be used instead. To open a URL normally in UC Mode, use driver.default_get(url). @@ -247,7 +290,7 @@ Here are the 3 primary things that UC Mode does to make bo For example, if the Chrome DevTools Console variables aren't renamed, you can expect to find them easily when using selenium for browser automation: - + (If those variables are still there, then websites can easily detect your bots.) @@ -278,7 +321,7 @@ The above JS method is used within the SeleniumBaseChoosing the right CAPTCHA service for your business / website: - + As an ethical hacker / cybersecurity researcher who builds bots that bypass CAPTCHAs for sport, the CAPTCHA service that I personally recommend for keeping bots out is Google's reCAPTCHA: @@ -288,6 +331,18 @@ Since Google makes Chrome, Google's own reCAPTCHA service -------- +βš–οΈ Legal implications of web-scraping: + +Based on the following article, https://nubela.co/blog/meta-lost-the-scraping-legal-battle-to-bright-data/, (which outlines a court case where social-networking company: Meta lost the legal battle to data-scraping company: Bright Data), it was determined that web scraping is 100% legal in the eyes of the courts as long as: +1. The scraping is only done with public data and not private data. +2. The scraping isn’t done while logged in on the site being scraped. + +If the above criteria are met, then scrape away! (According to the article) + +(Note: I'm not a lawyer, so I can't officially offer legal advice, but I can direct people to existing articles online where people can find their own answers.) + +-------- + SeleniumBase
SeleniumBase
From 9ede89b4c765ab67a490348fe863e07098ef9165 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Thu, 11 Apr 2024 19:08:10 -0400 Subject: [PATCH 6/6] Version 4.25.3 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index c827c4659c4..1f75a215f59 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.25.2" +__version__ = "4.25.3"