Skip to content

Commit

Permalink
Merge pull request #3224 from seleniumbase/cdp-mode-patch-3
Browse files Browse the repository at this point in the history
CDP Mode - Patch 3
  • Loading branch information
mdmintz authored Oct 26, 2024
2 parents 32f1288 + 1aaee33 commit e401902
Show file tree
Hide file tree
Showing 26 changed files with 287 additions and 125 deletions.
31 changes: 16 additions & 15 deletions examples/cdp_mode/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## [<img src="https://seleniumbase.github.io/img/logo6.png" title="SeleniumBase" width="32">](https://github.com/seleniumbase/SeleniumBase/) CDP Mode 🐙

🐙 <b translate="no">SeleniumBase</b> <b translate="no">CDP Mode</b> (Chrome Devtools Protocol Mode) is a special mode inside of <b><a href="https://github.com/seleniumbase/SeleniumBase/blob/master/help_docs/uc_mode.md" translate="no"><span translate="no">SeleniumBase UC Mode</span></a></b> that lets bots appear human while controlling the browser with the <b translate="no">CDP-Driver</b>. Although regular <span translate="no">UC Mode</span> can't perform <span translate="no">WebDriver</span> actions while the <code>driver</code> is disconnected from the browser, the <span translate="no">CDP-Driver</span> can still perform actions (while maintaining its cover).
🐙 <b translate="no">SeleniumBase</b> <b translate="no">CDP Mode</b> (Chrome Devtools Protocol Mode) is a special mode inside of <b><a href="https://github.com/seleniumbase/SeleniumBase/blob/master/help_docs/uc_mode.md" translate="no"><span translate="no">SeleniumBase UC Mode</span></a></b> that lets bots appear human while controlling the browser with the <b translate="no">CDP-Driver</b>. Although regular <span translate="no">UC Mode</span> can't perform <span translate="no">WebDriver</span> actions while the <code>driver</code> is disconnected from the browser, the <span translate="no">CDP-Driver</span> can still perform actions while maintaining its cover. (For Python 3.11 or newer!)

👤 <b translate="no">UC Mode</b> avoids bot-detection by first disconnecting WebDriver from the browser at strategic times, calling special <code>PyAutoGUI</code> methods to bypass CAPTCHAs (as needed), and finally reconnecting the <code>driver</code> afterwards so that WebDriver actions can be performed again. Although this approach works for bypassing simple CAPTCHAs, more flexibility is needed for bypassing bot-detection on websites with advanced protection. (That's where <b translate="no">CDP Mode</b> comes in.)

Expand Down Expand Up @@ -74,15 +74,15 @@ from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.pokemon.com/us"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(3)
sb.cdp.click_if_visible("button#onetrust-reject-all-handler")
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]')
sb.sleep(1)
sb.cdp.click('b:contains("Show Advanced Search")')
sb.sleep(1)
sb.cdp.click('span[data-type="type"][data-value="electric"]')
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click("a#advSearch")
sb.sleep(1)
sb.cdp.click('img[src*="img/pokedex/detail/025.png"]')
Expand Down Expand Up @@ -130,13 +130,13 @@ from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.hyatt.com/"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(2)
sb.cdp.click_if_visible('button[aria-label="Close"]')
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click('span:contains("Explore")')
sb.sleep(1)
sb.cdp.click('a:contains("Hotels & Resorts")')
sb.sleep(2.5)
sb.sleep(3)
location = "Anaheim, CA, USA"
sb.cdp.press_keys("input#searchbox", location)
sb.sleep(1)
Expand All @@ -145,7 +145,7 @@ with SB(uc=True, test=True, locale_code="en") as sb:
sb.cdp.click('div.hotel-card-footer button')
sb.sleep(1)
sb.cdp.click('button[data-locator="find-hotels"]')
sb.sleep(4)
sb.sleep(5)
hotel_names = sb.cdp.select_all(
'div[data-booking-status="BOOKABLE"] [class*="HotelCard_header"]'
)
Expand Down Expand Up @@ -177,20 +177,20 @@ from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.bestwestern.com/en_US.html"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(2.5)
sb.cdp.click_if_visible("div.onetrust-close-btn-handler")
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click("input#destination-input")
sb.sleep(1.5)
sb.sleep(2)
location = "Palm Springs, CA, USA"
sb.cdp.press_keys("input#destination-input", location)
sb.sleep(0.6)
sb.sleep(1)
sb.cdp.click("ul#google-suggestions li")
sb.sleep(0.6)
sb.sleep(1)
sb.cdp.click("button#btn-modify-stay-update")
sb.sleep(1.5)
sb.cdp.click("label#available-label")
sb.sleep(4)
sb.cdp.click("label#available-label")
sb.sleep(2.5)
print("Best Western Hotels in %s:" % location)
summary_details = sb.cdp.get_text("#summary-details-column")
dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip()
Expand Down Expand Up @@ -288,6 +288,7 @@ sb.cdp.get_document()
sb.cdp.get_flattened_document()
sb.cdp.get_element_attributes(selector)
sb.cdp.get_element_html(selector)
sb.cdp.set_locale(locale)
sb.cdp.set_attributes(selector, attribute, value)
sb.cdp.internalize_links()
sb.cdp.is_element_present(selector)
Expand Down
24 changes: 18 additions & 6 deletions examples/cdp_mode/raw_async.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import asyncio
import time
from contextlib import suppress
from seleniumbase.core import sb_cdp
from seleniumbase.undetected import cdp_driver


async def main():
driver = await cdp_driver.cdp_util.start_async()
page = await driver.get("https://www.priceline.com/")
page = await driver.get("about:blank")
await page.set_locale("en")
await page.get("https://www.priceline.com/")
time.sleep(3)
print(await page.evaluate("document.title"))
element = await page.select('[data-testid*="endLocation"]')
Expand All @@ -22,24 +25,33 @@ async def main():

# Call everything without using async / await
driver = cdp_driver.cdp_util.start_sync()
page = loop.run_until_complete(driver.get("https://www.pokemon.com/us"))
page = loop.run_until_complete(driver.get("about:blank"))
loop.run_until_complete(page.set_locale("en"))
loop.run_until_complete(page.get("https://www.pokemon.com/us"))
time.sleep(3)
print(loop.run_until_complete(page.evaluate("document.title")))
with suppress(Exception):
selector = "button#onetrust-reject-all-handler"
element = loop.run_until_complete(page.select(selector, timeout=1))
loop.run_until_complete(element.click_async())
time.sleep(1)
element = loop.run_until_complete(page.select("span.icon_pokeball"))
loop.run_until_complete(element.click_async())
time.sleep(1)
time.sleep(1.5)
print(loop.run_until_complete(page.evaluate("document.title")))
time.sleep(1)

# Call CDP methods via the simplified CDP API
page = loop.run_until_complete(driver.get("https://www.priceline.com/"))
page = loop.run_until_complete(driver.get("about:blank"))
sb = sb_cdp.CDPMethods(loop, page, driver)
sb.set_locale("en")
sb.open("https://www.priceline.com/")
sb.sleep(3)
sb.internalize_links() # Don't open links in a new tab
sb.click("#link_header_nav_experiences")
sb.sleep(2)
sb.sleep(2.5)
sb.remove_elements("msm-cookie-banner")
sb.sleep(1)
sb.sleep(1.5)
sb.press_keys('input[data-test-id*="search"]', "Amsterdam")
sb.sleep(2)
sb.click('span[data-test-id*="autocomplete"]')
Expand Down
14 changes: 7 additions & 7 deletions examples/cdp_mode/raw_bestwestern.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.bestwestern.com/en_US.html"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(2.5)
sb.cdp.click_if_visible("div.onetrust-close-btn-handler")
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click("input#destination-input")
sb.sleep(1.5)
sb.sleep(2)
location = "Palm Springs, CA, USA"
sb.cdp.press_keys("input#destination-input", location)
sb.sleep(0.6)
sb.sleep(1)
sb.cdp.click("ul#google-suggestions li")
sb.sleep(0.6)
sb.sleep(1)
sb.cdp.click("button#btn-modify-stay-update")
sb.sleep(1.5)
sb.cdp.click("label#available-label")
sb.sleep(4)
sb.cdp.click("label#available-label")
sb.sleep(2.5)
print("Best Western Hotels in %s:" % location)
summary_details = sb.cdp.get_text("#summary-details-column")
dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip()
Expand Down
13 changes: 7 additions & 6 deletions examples/cdp_mode/raw_cdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,22 @@

@decorators.print_runtime("CDP Priceline Example")
def main():
url = "https://www.priceline.com/"
url0 = "about:blank" # Set Locale code from here first
url1 = "https://www.priceline.com/" # (The "real" URL)
loop = asyncio.new_event_loop()
driver = cdp_driver.cdp_util.start_sync()
page = loop.run_until_complete(driver.get(url))
page = loop.run_until_complete(driver.get(url0))
sb = sb_cdp.CDPMethods(loop, page, driver)
sb.set_locale("en") # This test expects English locale
sb.open(url1)
sb.sleep(3)
sb.internalize_links() # Don't open links in a new tab
sb.click("#link_header_nav_experiences")
sb.sleep(2)
sb.sleep(2.5)
sb.remove_elements("msm-cookie-banner")
sb.sleep(1)
sb.sleep(1.5)
location = "Amsterdam"
sb.press_keys('input[data-test-id*="search"]', location)
sb.sleep(1)
sb.click('input[data-test-id*="search"]')
sb.sleep(2)
sb.click('span[data-test-id*="autocomplete"]')
sb.sleep(5)
Expand Down
8 changes: 3 additions & 5 deletions examples/cdp_mode/raw_cdp_with_sb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,17 @@
from seleniumbase import SB


with SB(uc=True, test=True) as sb:
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.priceline.com/"
sb.activate_cdp_mode(url)
sb.sleep(3)
sb.internalize_links() # Don't open links in a new tab
sb.click("#link_header_nav_experiences")
sb.sleep(2)
sb.sleep(2.5)
sb.remove_elements("msm-cookie-banner")
sb.sleep(1)
sb.sleep(1.5)
location = "Amsterdam"
sb.press_keys('input[data-test-id*="search"]', location)
sb.sleep(1)
sb.click('input[data-test-id*="search"]')
sb.sleep(2)
sb.click('span[data-test-id*="autocomplete"]')
sb.sleep(5)
Expand Down
2 changes: 1 addition & 1 deletion examples/cdp_mode/raw_footlocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
sb.cdp.press_keys('input[aria-label="Search"]', search)
sb.sleep(2)
sb.cdp.mouse_click('ul[id*="typeahead"] li div')
sb.sleep(2)
sb.sleep(3)
elements = sb.cdp.select_all("a.ProductCard-link")
if elements:
print('**** Found results for "%s": ****' % search)
Expand Down
8 changes: 4 additions & 4 deletions examples/cdp_mode/raw_hyatt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.hyatt.com/"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(2)
sb.cdp.click_if_visible('button[aria-label="Close"]')
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click('span:contains("Explore")')
sb.sleep(1)
sb.cdp.click('a:contains("Hotels & Resorts")')
sb.sleep(2.5)
sb.sleep(3)
location = "Anaheim, CA, USA"
sb.cdp.press_keys("input#searchbox", location)
sb.sleep(1)
Expand All @@ -18,7 +18,7 @@
sb.cdp.click('div.hotel-card-footer button')
sb.sleep(1)
sb.cdp.click('button[data-locator="find-hotels"]')
sb.sleep(4)
sb.sleep(5)
hotel_names = sb.cdp.select_all(
'div[data-booking-status="BOOKABLE"] [class*="HotelCard_header"]'
)
Expand Down
6 changes: 3 additions & 3 deletions examples/cdp_mode/raw_pokemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.pokemon.com/us"
sb.activate_cdp_mode(url)
sb.sleep(1.5)
sb.sleep(3)
sb.cdp.click_if_visible("button#onetrust-reject-all-handler")
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]')
sb.sleep(1)
sb.cdp.click('b:contains("Show Advanced Search")')
sb.sleep(1)
sb.cdp.click('span[data-type="type"][data-value="electric"]')
sb.sleep(0.5)
sb.sleep(1)
sb.cdp.click("a#advSearch")
sb.sleep(1)
sb.cdp.click('img[src*="img/pokedex/detail/025.png"]')
Expand Down
14 changes: 7 additions & 7 deletions examples/cdp_mode/raw_priceline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@
window_handle = sb.driver.current_window_handle
url = "https://www.priceline.com"
sb.activate_cdp_mode(url)
sb.sleep(1)
sb.sleep(3)
sb.cdp.click('input[name="endLocation"]')
sb.sleep(1)
location = "Portland, OR, USA"
selection = "Oregon, United States" # (Dropdown option)
sb.cdp.press_keys('input[name="endLocation"]', location)
sb.sleep(1)
sb.click_if_visible('input[name="endLocation"]')
sb.sleep(1)
sb.cdp.click("Oregon, United States")
sb.sleep(1)
sb.sleep(0.5)
sb.cdp.click(selection)
sb.sleep(1.5)
sb.cdp.click('button[aria-label="Dismiss calendar"]')
sb.sleep(3)
sb.sleep(5)
sb.connect()
if len(sb.driver.window_handles) > 1:
sb.switch_to_window(window_handle)
sb.driver.close()
sb.sleep(0.1)
sb.sleep(0.2)
sb.switch_to_newest_window()
sb.sleep(1)
sb.sleep(0.6)
hotel_names = sb.find_elements('a[data-autobot-element-id*="HOTEL_NAME"]')
hotel_prices = sb.find_elements('span[font-size="4,,,5"]')
print("Priceline Hotels in %s:" % location)
Expand Down
7 changes: 3 additions & 4 deletions examples/cdp_mode/raw_req_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ class RequestPausedTest():
async def request_paused_handler(self, event, tab):
r = event.request
is_image = ".png" in r.url or ".jpg" in r.url or ".gif" in r.url
is_blocked = True if is_image else False
if not is_blocked:
if not is_image: # Let the data through
tab.feed_cdp(
mycdp.fetch.continue_request(request_id=event.request_id)
)
else:
else: # Block the data (images)
TIMED_OUT = mycdp.network.ErrorReason.TIMED_OUT
s = f"BLOCKING | {r.method} | {r.url}"
print(f" >>> ------------\n{s}")
Expand All @@ -23,7 +22,7 @@ async def request_paused_handler(self, event, tab):
)

async def start_test(self):
driver = await cdp_driver.cdp_util.start_async(incognito=True)
driver = await cdp_driver.cdp_util.start_async()
tab = await driver.get("about:blank")
tab.add_handler(mycdp.fetch.RequestPaused, self.request_paused_handler)
url = "https://gettyimages.com/photos/firefly-2003-nathan"
Expand Down
7 changes: 3 additions & 4 deletions examples/cdp_mode/raw_req_sb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
async def request_paused_handler(event, tab):
r = event.request
is_image = ".png" in r.url or ".jpg" in r.url or ".gif" in r.url
is_blocked = True if is_image else False
if not is_blocked:
if not is_image: # Let the data through
tab.feed_cdp(mycdp.fetch.continue_request(request_id=event.request_id))
else:
else: # Block the data (images)
TIMED_OUT = mycdp.network.ErrorReason.TIMED_OUT
s = f"BLOCKING | {r.method} | {r.url}"
print(f" >>> ------------\n{s}")
tab.feed_cdp(mycdp.fetch.fail_request(event.request_id, TIMED_OUT))


with SB(uc=True, test=True, locale_code="en", incognito=True) as sb:
with SB(uc=True, test=True, locale_code="en") as sb:
sb.activate_cdp_mode("about:blank")
sb.cdp.add_handler(mycdp.fetch.RequestPaused, request_paused_handler)
url = "https://gettyimages.com/photos/firefly-2003-nathan"
Expand Down
2 changes: 1 addition & 1 deletion examples/raw_gui_click.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

with SB(uc=True, test=True, rtf=True, agent=agent) as sb:
url = "https://gitlab.com/users/sign_in"
sb.activate_cdp_mode(url)
sb.uc_open_with_reconnect(url)
sb.uc_gui_click_captcha() # Only if needed
sb.assert_element('label[for="user_login"]')
sb.assert_element('input[data-testid*="username"]')
Expand Down
2 changes: 1 addition & 1 deletion examples/raw_pyautogui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

with SB(uc=True, test=True, rtf=True, agent=agent) as sb:
url = "https://gitlab.com/users/sign_in"
sb.uc_open_with_reconnect(url, 4)
sb.uc_open_with_reconnect(url)
sb.uc_gui_handle_captcha() # Only if needed
sb.assert_element('label[for="user_login"]')
sb.assert_element('input[data-testid*="username"]')
Expand Down
2 changes: 0 additions & 2 deletions examples/raw_recaptcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

with SB(uc=True, test=True) as sb:
url = "https://seleniumbase.io/apps/recaptcha"
sb.activate_cdp_mode(url)
sb.uc_gui_handle_captcha() # Try with TAB + SPACEBAR
sb.assert_element("img#captcha-success", timeout=3)
sb.set_messenger_theme(location="top_left")
sb.post_message("SeleniumBase wasn't detected", duration=3)

with SB(uc=True, test=True) as sb:
url = "https://seleniumbase.io/apps/recaptcha"
sb.activate_cdp_mode(url)
sb.uc_gui_click_captcha('iframe[src*="/recaptcha/"]')
sb.assert_element("img#captcha-success", timeout=3)
sb.set_messenger_theme(location="top_left")
Expand Down
Loading

0 comments on commit e401902

Please sign in to comment.