diff --git a/README.md b/README.md index c1b188d9acf..3364795f3fd 100755 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

SeleniumBase

-

SeleniumBase

+

SeleniumBase

All-in-one Browser Automation Framework:
Web Crawling / Testing / Scraping / Stealth

@@ -102,7 +102,7 @@ pytest test_demo_site.py -------- -

SeleniumBase

+

SeleniumBase

Explore the README:

@@ -1371,7 +1371,7 @@ pytest --reruns=1 --reruns-delay=1

https://github.com/mdmintz

-
+
SeleniumBase Docs
Tested with SeleniumBase
Gitter chat
SeleniumBase PyPI downloads
diff --git a/examples/raw_ahrefs.py b/examples/raw_ahrefs.py index e3460528729..35ae621fbea 100644 --- a/examples/raw_ahrefs.py +++ b/examples/raw_ahrefs.py @@ -1,6 +1,5 @@ from seleniumbase import SB - with SB(uc=True, test=True, locale_code="en") as sb: url = "https://ahrefs.com/website-authority-checker" input_field = 'input[placeholder="Enter domain"]' diff --git a/examples/raw_form_turnstile.py b/examples/raw_form_turnstile.py index e6597bf3376..3b383b99cbc 100644 --- a/examples/raw_form_turnstile.py +++ b/examples/raw_form_turnstile.py @@ -1,7 +1,8 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: - sb.driver.uc_open_with_reconnect("seleniumbase.io/apps/form_turnstile", 3) + url = "seleniumbase.io/apps/form_turnstile" + sb.driver.uc_open_with_reconnect(url, 2) sb.press_keys("#name", "SeleniumBase") sb.press_keys("#email", "test@test.test") sb.press_keys("#phone", "1-555-555-5555") diff --git a/examples/raw_nopecha.py b/examples/raw_nopecha.py index 0d193fe7b68..28f3523ce98 100644 --- a/examples/raw_nopecha.py +++ b/examples/raw_nopecha.py @@ -1,7 +1,7 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: - sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 3.4) + sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 4) if sb.is_element_visible("#example-container0 iframe"): sb.switch_to_frame("#example-container0 iframe") if not sb.is_element_visible("circle.success-circle"): diff --git a/examples/raw_order_tickets.py b/examples/raw_order_tickets.py new file mode 100644 index 00000000000..1f53bde42fb --- /dev/null +++ b/examples/raw_order_tickets.py @@ -0,0 +1,11 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block_on=True) as sb: + url = "https://www.thaiticketmajor.com/concert/" + sb.driver.uc_open_with_reconnect(url, 5.5) + sb.driver.uc_click("button.btn-signin", 4) + sb.switch_to_frame('iframe[title*="Cloudflare"]') + sb.assert_element("div#success svg#success-icon") + sb.switch_to_default_content() + sb.set_messenger_theme(location="top_center") + sb.post_message("SeleniumBase wasn't detected!") diff --git a/examples/raw_turnstile.py b/examples/raw_turnstile.py index 59f0c648d02..7a2ecee07a7 100644 --- a/examples/raw_turnstile.py +++ b/examples/raw_turnstile.py @@ -2,9 +2,8 @@ def open_the_turnstile_page(sb): - sb.driver.uc_open_with_reconnect( - "seleniumbase.io/apps/turnstile", reconnect_time=3, - ) + url = "seleniumbase.io/apps/turnstile" + sb.driver.uc_open_with_reconnect(url, reconnect_time=2) def click_turnstile_and_verify(sb): diff --git a/help_docs/uc_mode.md b/help_docs/uc_mode.md index 20ba73f61cd..48ebbc47d41 100644 --- a/help_docs/uc_mode.md +++ b/help_docs/uc_mode.md @@ -19,17 +19,21 @@ from seleniumbase import Driver driver = Driver(uc=True) -driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3) +url = "https://gitlab.com/users/sign_in" +driver.uc_open_with_reconnect(url, 3) driver.quit() ``` + + πŸ‘€ Here's an example with the SB manager (which has more methods and functionality than the Driver format): ```python from seleniumbase import SB with SB(uc=True) as sb: - sb.driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3) + url = "https://gitlab.com/users/sign_in" + sb.driver.uc_open_with_reconnect(url, 3) ``` πŸ‘€ Here's a longer example, which includes a retry if the CAPTCHA isn't bypassed on the first attempt: @@ -55,9 +59,8 @@ with SB(uc=True, test=True) as sb: from seleniumbase import SB def open_the_turnstile_page(sb): - sb.driver.uc_open_with_reconnect( - "https://seleniumbase.io/apps/turnstile", reconnect_time=3, - ) + url = "seleniumbase.io/apps/turnstile" + sb.driver.uc_open_with_reconnect(url, reconnect_time=2) def click_turnstile_and_verify(sb): sb.switch_to_frame("iframe") @@ -77,6 +80,46 @@ with SB(uc=True, test=True) as sb: +πŸ‘€ Here's an example where the CAPTCHA appears after submitting a form: + +```python +from seleniumbase import SB + +with SB(uc=True, test=True, locale_code="en") as sb: + url = "https://ahrefs.com/website-authority-checker" + input_field = 'input[placeholder="Enter domain"]' + submit_button = 'span:contains("Check Authority")' + sb.driver.uc_open_with_reconnect(url, 1) # The bot-check is later + sb.type(input_field, "github.com/seleniumbase/SeleniumBase") + sb.driver.reconnect(0.1) + sb.driver.uc_click(submit_button, reconnect_time=4) + sb.wait_for_text_not_visible("Checking", timeout=10) + sb.highlight('p:contains("github.com/seleniumbase/SeleniumBase")') + sb.highlight('a:contains("Top 100 backlinks")') + sb.set_messenger_theme(location="bottom_center") + sb.post_message("SeleniumBase wasn't detected!") +``` + + + +πŸ‘€ Here, the CAPTCHA appears after clicking to go to the sign-in screen: + +```python +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block_on=True) as sb: + url = "https://www.thaiticketmajor.com/concert/" + sb.driver.uc_open_with_reconnect(url, 5.5) + sb.driver.uc_click("button.btn-signin", 4) + sb.switch_to_frame('iframe[title*="Cloudflare"]') + sb.assert_element("div#success svg#success-icon") + sb.switch_to_default_content() + sb.set_messenger_theme(location="top_center") + sb.post_message("SeleniumBase wasn't detected!") +``` + + + -------- πŸ‘€ In UC Mode, driver.get(url) has been modified from its original version: If anti-bot services are detected from a requests.get(url) call that's made before navigating to the website, then driver.uc_open_with_reconnect(url) will be used instead. To open a URL normally in UC Mode, use driver.default_get(url). @@ -247,7 +290,7 @@ Here are the 3 primary things that UC Mode does to make bo For example, if the Chrome DevTools Console variables aren't renamed, you can expect to find them easily when using selenium for browser automation: - + (If those variables are still there, then websites can easily detect your bots.) @@ -278,7 +321,7 @@ The above JS method is used within the SeleniumBaseChoosing the right CAPTCHA service for your business / website: - + As an ethical hacker / cybersecurity researcher who builds bots that bypass CAPTCHAs for sport, the CAPTCHA service that I personally recommend for keeping bots out is Google's reCAPTCHA: @@ -288,6 +331,18 @@ Since Google makes Chrome, Google's own reCAPTCHA service -------- +βš–οΈ Legal implications of web-scraping: + +Based on the following article, https://nubela.co/blog/meta-lost-the-scraping-legal-battle-to-bright-data/, (which outlines a court case where social-networking company: Meta lost the legal battle to data-scraping company: Bright Data), it was determined that web scraping is 100% legal in the eyes of the courts as long as: +1. The scraping is only done with public data and not private data. +2. The scraping isn’t done while logged in on the site being scraped. + +If the above criteria are met, then scrape away! (According to the article) + +(Note: I'm not a lawyer, so I can't officially offer legal advice, but I can direct people to existing articles online where people can find their own answers.) + +-------- + SeleniumBase
SeleniumBase
diff --git a/mkdocs_build/requirements.txt b/mkdocs_build/requirements.txt index 58890a07f06..4b0523b3b95 100644 --- a/mkdocs_build/requirements.txt +++ b/mkdocs_build/requirements.txt @@ -3,7 +3,7 @@ regex>=2023.12.25 pymdown-extensions>=10.7.1 -pipdeptree>=2.17.0 +pipdeptree>=2.18.0 python-dateutil>=2.8.2 Markdown==3.6 markdown2==2.4.13 diff --git a/requirements.txt b/requirements.txt index d3f3c1f0bb7..9e6dd59c157 100755 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ wheel>=0.43.0;python_version>="3.8" attrs>=23.2.0 certifi>=2024.2.2 filelock>=3.12.2;python_version<"3.8" -filelock>=3.13.3;python_version>="3.8" +filelock>=3.13.4;python_version>="3.8" platformdirs>=4.0.0;python_version<"3.8" platformdirs>=4.2.0;python_version>="3.8" typing-extensions>=4.11.0;python_version>="3.8" @@ -15,7 +15,7 @@ parse>=1.20.1 parse-type>=0.6.2 pyyaml>=6.0.1 six==1.16.0 -idna==3.6 +idna==3.7 chardet==5.2.0 charset-normalizer==3.3.2 urllib3>=1.26.18,<2;python_version<"3.10" @@ -35,7 +35,7 @@ cssselect==1.2.0 sortedcontainers==2.4.0 fasteners==0.19 execnet==2.0.2;python_version<"3.8" -execnet==2.1.0;python_version>="3.8" +execnet==2.1.1;python_version>="3.8" iniconfig==2.0.0 pluggy==1.2.0;python_version<"3.8" pluggy==1.4.0;python_version>="3.8" diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index c827c4659c4..1f75a215f59 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.25.2" +__version__ = "4.25.3" diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py index 2316c721df1..0a23225eb1e 100644 --- a/seleniumbase/fixtures/base_case.py +++ b/seleniumbase/fixtures/base_case.py @@ -4134,6 +4134,23 @@ def get_new_driver( self.__dont_record_open = True self.open(new_start_page) self.__dont_record_open = False + if undetectable: + if hasattr(new_driver, "uc_open"): + self.uc_open = new_driver.uc_open + if hasattr(new_driver, "uc_open_with_tab"): + self.uc_open_with_tab = new_driver.uc_open_with_tab + if hasattr(new_driver, "uc_open_with_reconnect"): + self.uc_open_with_reconnect = new_driver.uc_open_with_reconnect + if hasattr(new_driver, "reconnect"): + self.reconnect = new_driver.reconnect + if hasattr(new_driver, "disconnect"): + self.disconnect = new_driver.disconnect + if hasattr(new_driver, "connect"): + self.connect = new_driver.connect + if hasattr(new_driver, "uc_click"): + self.uc_click = new_driver.uc_click + if hasattr(new_driver, "uc_switch_to_frame"): + self.uc_switch_to_frame = new_driver.uc_switch_to_frame return new_driver def switch_to_driver(self, driver): diff --git a/seleniumbase/undetected/webelement.py b/seleniumbase/undetected/webelement.py index 6b540f0c52d..1db44f30494 100644 --- a/seleniumbase/undetected/webelement.py +++ b/seleniumbase/undetected/webelement.py @@ -14,7 +14,7 @@ def uc_click( ): if driver and selector and by: delayed_click = False - if tag_name == "span" or tag_name == "button" or tag_name == "div": + if tag_name in ["span", "button", "div", "a"]: delayed_click = True if delayed_click and ":contains" not in selector: selector = js_utils.convert_to_css_selector(selector, by) diff --git a/setup.py b/setup.py index 64cd01c7dea..61476ff7734 100755 --- a/setup.py +++ b/setup.py @@ -155,7 +155,7 @@ 'attrs>=23.2.0', "certifi>=2024.2.2", 'filelock>=3.12.2;python_version<"3.8"', - 'filelock>=3.13.3;python_version>="3.8"', + 'filelock>=3.13.4;python_version>="3.8"', 'platformdirs>=4.0.0;python_version<"3.8"', 'platformdirs>=4.2.0;python_version>="3.8"', 'typing-extensions>=4.11.0;python_version>="3.8"', @@ -163,7 +163,7 @@ 'parse-type>=0.6.2', 'pyyaml>=6.0.1', "six==1.16.0", - "idna==3.6", + "idna==3.7", 'chardet==5.2.0', 'charset-normalizer==3.3.2', 'urllib3>=1.26.18,<2;python_version<"3.10"', @@ -183,7 +183,7 @@ "sortedcontainers==2.4.0", 'fasteners==0.19', 'execnet==2.0.2;python_version<"3.8"', - 'execnet==2.1.0;python_version>="3.8"', + 'execnet==2.1.1;python_version>="3.8"', 'iniconfig==2.0.0', 'pluggy==1.2.0;python_version<"3.8"', 'pluggy==1.4.0;python_version>="3.8"',