Skip to content

Commit

Permalink
Merge pull request #2685 from seleniumbase/refresh-uc-mode-and-depend…
Browse files Browse the repository at this point in the history
…encies

Refresh UC Mode and dependencies
  • Loading branch information
mdmintz authored Apr 11, 2024
2 parents f32657c + 9ede89b commit e87d738
Show file tree
Hide file tree
Showing 13 changed files with 107 additions and 25 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<h1>SeleniumBase</h1>

<p align="center"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" alt="SeleniumBase" title="SeleniumBase" width="350" /></a></p>
<p align="center"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" alt="SeleniumBase" title="SeleniumBase" width="350" /></a></p>


<p align="center" class="hero__title"><b>All-in-one Browser Automation Framework:<br />Web Crawling / Testing / Scraping / Stealth</b></p>
Expand Down Expand Up @@ -102,7 +102,7 @@ pytest test_demo_site.py

--------

<p align="left"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" alt="SeleniumBase" title="SeleniumBase" width="232" /></a></p>
<p align="left"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" alt="SeleniumBase" title="SeleniumBase" width="232" /></a></p>

<blockquote>
<p dir="auto"><strong>Explore the README:</strong></p>
Expand Down Expand Up @@ -1371,7 +1371,7 @@ pytest --reruns=1 --reruns-delay=1
<p><div><b><a href="https://github.com/mdmintz">https://github.com/mdmintz</a></b></div></p>
<div><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" title="SeleniumBase" width="240" /></a></div>
<div><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" title="SeleniumBase" width="240" /></a></div>
<div><a href="https://seleniumbase.io"><img src="https://img.shields.io/badge/docs-seleniumbase.io-11BBAA.svg" alt="SeleniumBase Docs" /></a></div> <div><a href="https://github.com/seleniumbase/SeleniumBase"><img src="https://img.shields.io/badge/tested%20with-SeleniumBase-04C38E.svg" alt="Tested with SeleniumBase" /></a></div> <div><a href="https://github.com/seleniumbase/SeleniumBase/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-22BBCC.svg" title="SeleniumBase" /></a> <a href="https://gitter.im/seleniumbase/SeleniumBase" target="_blank"><img src="https://img.shields.io/gitter/room/seleniumbase/SeleniumBase.svg" alt="Gitter chat"/></a></div>
<div><a href="https://pepy.tech/project/seleniumbase" target="_blank"><img src="https://static.pepy.tech/badge/seleniumbase" alt="SeleniumBase PyPI downloads" /></a></div>
<div><a href="https://github.com/seleniumbase/SeleniumBase/stargazers"><img src="https://img.shields.io/github/stars/seleniumbase/seleniumbase.svg?color=19A57B" title="Stargazers" /></a></div>
Expand Down
1 change: 0 additions & 1 deletion examples/raw_ahrefs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from seleniumbase import SB


with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://ahrefs.com/website-authority-checker"
input_field = 'input[placeholder="Enter domain"]'
Expand Down
3 changes: 2 additions & 1 deletion examples/raw_form_turnstile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from seleniumbase import SB

with SB(uc=True, test=True) as sb:
sb.driver.uc_open_with_reconnect("seleniumbase.io/apps/form_turnstile", 3)
url = "seleniumbase.io/apps/form_turnstile"
sb.driver.uc_open_with_reconnect(url, 2)
sb.press_keys("#name", "SeleniumBase")
sb.press_keys("#email", "[email protected]")
sb.press_keys("#phone", "1-555-555-5555")
Expand Down
2 changes: 1 addition & 1 deletion examples/raw_nopecha.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from seleniumbase import SB

with SB(uc=True, test=True) as sb:
sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 3.4)
sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 4)
if sb.is_element_visible("#example-container0 iframe"):
sb.switch_to_frame("#example-container0 iframe")
if not sb.is_element_visible("circle.success-circle"):
Expand Down
11 changes: 11 additions & 0 deletions examples/raw_order_tickets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from seleniumbase import SB

with SB(uc=True, test=True, ad_block_on=True) as sb:
url = "https://www.thaiticketmajor.com/concert/"
sb.driver.uc_open_with_reconnect(url, 5.5)
sb.driver.uc_click("button.btn-signin", 4)
sb.switch_to_frame('iframe[title*="Cloudflare"]')
sb.assert_element("div#success svg#success-icon")
sb.switch_to_default_content()
sb.set_messenger_theme(location="top_center")
sb.post_message("SeleniumBase wasn't detected!")
5 changes: 2 additions & 3 deletions examples/raw_turnstile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@


def open_the_turnstile_page(sb):
sb.driver.uc_open_with_reconnect(
"seleniumbase.io/apps/turnstile", reconnect_time=3,
)
url = "seleniumbase.io/apps/turnstile"
sb.driver.uc_open_with_reconnect(url, reconnect_time=2)


def click_turnstile_and_verify(sb):
Expand Down
69 changes: 62 additions & 7 deletions help_docs/uc_mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@
from seleniumbase import Driver

driver = Driver(uc=True)
driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3)
url = "https://gitlab.com/users/sign_in"
driver.uc_open_with_reconnect(url, 3)
driver.quit()
```

<img src="https://seleniumbase.github.io/other/gitlab_bypass.png" title="SeleniumBase" width="370">

👤 Here's an example with the <b><code translate="no">SB</code></b> manager (which has more methods and functionality than the <b><code translate="no">Driver</code></b> format):

```python
from seleniumbase import SB

with SB(uc=True) as sb:
sb.driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3)
url = "https://gitlab.com/users/sign_in"
sb.driver.uc_open_with_reconnect(url, 3)
```

👤 Here's a longer example, which includes a retry if the CAPTCHA isn't bypassed on the first attempt:
Expand All @@ -55,9 +59,8 @@ with SB(uc=True, test=True) as sb:
from seleniumbase import SB

def open_the_turnstile_page(sb):
sb.driver.uc_open_with_reconnect(
"https://seleniumbase.io/apps/turnstile", reconnect_time=3,
)
url = "seleniumbase.io/apps/turnstile"
sb.driver.uc_open_with_reconnect(url, reconnect_time=2)

def click_turnstile_and_verify(sb):
sb.switch_to_frame("iframe")
Expand All @@ -77,6 +80,46 @@ with SB(uc=True, test=True) as sb:

<img src="https://seleniumbase.github.io/other/turnstile_click.jpg" title="SeleniumBase" width="440">

👤 Here's an example <b>where the CAPTCHA appears after submitting a form</b>:

```python
from seleniumbase import SB

with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://ahrefs.com/website-authority-checker"
input_field = 'input[placeholder="Enter domain"]'
submit_button = 'span:contains("Check Authority")'
sb.driver.uc_open_with_reconnect(url, 1) # The bot-check is later
sb.type(input_field, "github.com/seleniumbase/SeleniumBase")
sb.driver.reconnect(0.1)
sb.driver.uc_click(submit_button, reconnect_time=4)
sb.wait_for_text_not_visible("Checking", timeout=10)
sb.highlight('p:contains("github.com/seleniumbase/SeleniumBase")')
sb.highlight('a:contains("Top 100 backlinks")')
sb.set_messenger_theme(location="bottom_center")
sb.post_message("SeleniumBase wasn't detected!")
```

<img src="https://seleniumbase.github.io/other/ahrefs_bypass.png" title="SeleniumBase" width="540">

👤 Here, <b>the CAPTCHA appears after clicking to go to the sign-in screen</b>:

```python
from seleniumbase import SB

with SB(uc=True, test=True, ad_block_on=True) as sb:
url = "https://www.thaiticketmajor.com/concert/"
sb.driver.uc_open_with_reconnect(url, 5.5)
sb.driver.uc_click("button.btn-signin", 4)
sb.switch_to_frame('iframe[title*="Cloudflare"]')
sb.assert_element("div#success svg#success-icon")
sb.switch_to_default_content()
sb.set_messenger_theme(location="top_center")
sb.post_message("SeleniumBase wasn't detected!")
```

<img src="https://seleniumbase.github.io/other/ttm_bypass.png" title="SeleniumBase" width="540">

--------

👤 In <b translate="no">UC Mode</b>, <code translate="no">driver.get(url)</code> has been modified from its original version: If anti-bot services are detected from a <code translate="no">requests.get(url)</code> call that's made before navigating to the website, then <code translate="no">driver.uc_open_with_reconnect(url)</code> will be used instead. To open a URL normally in <b translate="no">UC Mode</b>, use <code translate="no">driver.default_get(url)</code>.
Expand Down Expand Up @@ -247,7 +290,7 @@ Here are the 3 primary things that <b translate="no">UC Mode</b> does to make bo

For example, if the <b translate="no">Chrome DevTools Console</b> variables aren't renamed, you can expect to find them easily when using <b><code translate="no">selenium</code></b> for browser automation:

<img src="https://seleniumbase.github.io/other/cdc_args.png" title="SeleniumBase" width="380">
<img src="https://seleniumbase.github.io/other/cdc_args.png" title="SeleniumBase" width="390">

(If those variables are still there, then websites can easily detect your bots.)

Expand Down Expand Up @@ -278,7 +321,7 @@ The above JS method is used within the <b><code translate="no">SeleniumBase</cod

🏆 <b>Choosing the right CAPTCHA service</b> for your business / website:

<img src="https://seleniumbase.github.io/other/me_se_conf.jpg" title="SeleniumBase" width="340">
<img src="https://seleniumbase.github.io/other/me_se_conf.jpg" title="SeleniumBase" width="370">

As an ethical hacker / cybersecurity researcher who builds bots that bypass CAPTCHAs for sport, <b>the CAPTCHA service that I personally recommend</b> for keeping bots out is <b translate="no">Google's reCAPTCHA</b>:

Expand All @@ -288,6 +331,18 @@ Since Google makes Chrome, Google's own <b translate="no">reCAPTCHA</b> service

--------

⚖️ <b>Legal implications of web-scraping</b>:

Based on the following article, https://nubela.co/blog/meta-lost-the-scraping-legal-battle-to-bright-data/, (which outlines a court case where social-networking company: Meta lost the legal battle to data-scraping company: Bright Data), it was determined that web scraping is 100% legal in the eyes of the courts as long as:
1. The scraping is only done with <b>public data</b> and <b>not private data</b>.
2. The scraping isn’t done while logged in on the site being scraped.

If the above criteria are met, then scrape away! (According to the article)

(Note: I'm not a lawyer, so I can't officially offer legal advice, but I can direct people to existing articles online where people can find their own answers.)

--------

<img src="https://seleniumbase.github.io/cdn/img/sb_text_f.png" alt="SeleniumBase" title="SeleniumBase" align="center" width="335">

<div><a href="https://github.com/seleniumbase/SeleniumBase"><img src="https://seleniumbase.github.io/cdn/img/sb_logo_gs.png" alt="SeleniumBase" title="SeleniumBase" width="335" /></a></div>
2 changes: 1 addition & 1 deletion mkdocs_build/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

regex>=2023.12.25
pymdown-extensions>=10.7.1
pipdeptree>=2.17.0
pipdeptree>=2.18.0
python-dateutil>=2.8.2
Markdown==3.6
markdown2==2.4.13
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ wheel>=0.43.0;python_version>="3.8"
attrs>=23.2.0
certifi>=2024.2.2
filelock>=3.12.2;python_version<"3.8"
filelock>=3.13.3;python_version>="3.8"
filelock>=3.13.4;python_version>="3.8"
platformdirs>=4.0.0;python_version<"3.8"
platformdirs>=4.2.0;python_version>="3.8"
typing-extensions>=4.11.0;python_version>="3.8"
parse>=1.20.1
parse-type>=0.6.2
pyyaml>=6.0.1
six==1.16.0
idna==3.6
idna==3.7
chardet==5.2.0
charset-normalizer==3.3.2
urllib3>=1.26.18,<2;python_version<"3.10"
Expand All @@ -35,7 +35,7 @@ cssselect==1.2.0
sortedcontainers==2.4.0
fasteners==0.19
execnet==2.0.2;python_version<"3.8"
execnet==2.1.0;python_version>="3.8"
execnet==2.1.1;python_version>="3.8"
iniconfig==2.0.0
pluggy==1.2.0;python_version<"3.8"
pluggy==1.4.0;python_version>="3.8"
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# seleniumbase package
__version__ = "4.25.2"
__version__ = "4.25.3"
17 changes: 17 additions & 0 deletions seleniumbase/fixtures/base_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -4134,6 +4134,23 @@ def get_new_driver(
self.__dont_record_open = True
self.open(new_start_page)
self.__dont_record_open = False
if undetectable:
if hasattr(new_driver, "uc_open"):
self.uc_open = new_driver.uc_open
if hasattr(new_driver, "uc_open_with_tab"):
self.uc_open_with_tab = new_driver.uc_open_with_tab
if hasattr(new_driver, "uc_open_with_reconnect"):
self.uc_open_with_reconnect = new_driver.uc_open_with_reconnect
if hasattr(new_driver, "reconnect"):
self.reconnect = new_driver.reconnect
if hasattr(new_driver, "disconnect"):
self.disconnect = new_driver.disconnect
if hasattr(new_driver, "connect"):
self.connect = new_driver.connect
if hasattr(new_driver, "uc_click"):
self.uc_click = new_driver.uc_click
if hasattr(new_driver, "uc_switch_to_frame"):
self.uc_switch_to_frame = new_driver.uc_switch_to_frame
return new_driver

def switch_to_driver(self, driver):
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/undetected/webelement.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def uc_click(
):
if driver and selector and by:
delayed_click = False
if tag_name == "span" or tag_name == "button" or tag_name == "div":
if tag_name in ["span", "button", "div", "a"]:
delayed_click = True
if delayed_click and ":contains" not in selector:
selector = js_utils.convert_to_css_selector(selector, by)
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,15 @@
'attrs>=23.2.0',
"certifi>=2024.2.2",
'filelock>=3.12.2;python_version<"3.8"',
'filelock>=3.13.3;python_version>="3.8"',
'filelock>=3.13.4;python_version>="3.8"',
'platformdirs>=4.0.0;python_version<"3.8"',
'platformdirs>=4.2.0;python_version>="3.8"',
'typing-extensions>=4.11.0;python_version>="3.8"',
'parse>=1.20.1',
'parse-type>=0.6.2',
'pyyaml>=6.0.1',
"six==1.16.0",
"idna==3.6",
"idna==3.7",
'chardet==5.2.0',
'charset-normalizer==3.3.2',
'urllib3>=1.26.18,<2;python_version<"3.10"',
Expand All @@ -183,7 +183,7 @@
"sortedcontainers==2.4.0",
'fasteners==0.19',
'execnet==2.0.2;python_version<"3.8"',
'execnet==2.1.0;python_version>="3.8"',
'execnet==2.1.1;python_version>="3.8"',
'iniconfig==2.0.0',
'pluggy==1.2.0;python_version<"3.8"',
'pluggy==1.4.0;python_version>="3.8"',
Expand Down

0 comments on commit e87d738

Please sign in to comment.