From 270006657a362fc6a4d13eee8135b0862e187154 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 26 Oct 2024 15:58:49 -0400 Subject: [PATCH 1/4] Update CDP Mode / UC Mode --- seleniumbase/core/browser_launcher.py | 11 ++- seleniumbase/core/sb_cdp.py | 78 +++++++++++++++--- seleniumbase/fixtures/base_case.py | 80 ++++++++++++++++--- seleniumbase/fixtures/shared_utils.py | 18 +++++ seleniumbase/plugins/driver_manager.py | 15 ++-- .../undetected/cdp_driver/cdp_util.py | 47 ++++++----- seleniumbase/undetected/cdp_driver/config.py | 6 ++ .../undetected/cdp_driver/connection.py | 5 ++ seleniumbase/undetected/cdp_driver/tab.py | 2 +- 9 files changed, 208 insertions(+), 54 deletions(-) diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index c176f9f77ea..3b1501a3715 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -549,7 +549,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp = types.SimpleNamespace() CDPM = sb_cdp.CDPMethods(loop, page, driver) cdp.get = CDPM.get - cdp.open = CDPM.get + cdp.open = CDPM.open cdp.reload = CDPM.reload cdp.refresh = CDPM.refresh cdp.add_handler = CDPM.add_handler @@ -590,6 +590,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp.medimize = CDPM.medimize cdp.set_window_rect = CDPM.set_window_rect cdp.reset_window_size = CDPM.reset_window_size + cdp.set_locale = CDPM.set_locale cdp.set_attributes = CDPM.set_attributes cdp.internalize_links = CDPM.internalize_links cdp.get_window = CDPM.get_window @@ -2179,8 +2180,13 @@ def _set_chrome_options( or IS_LINUX # switches to Xvfb (non-headless) ) ): + chrome_options.add_argument("--no-pings") chrome_options.add_argument("--disable-popup-blocking") - chrome_options.add_argument("--homepage=chrome://new-tab-page/") + chrome_options.add_argument("--homepage=chrome://version/") + chrome_options.add_argument("--animation-duration-scale=0") + chrome_options.add_argument("--wm-window-animations-disabled") + chrome_options.add_argument("--enable-privacy-sandbox-ads-apis") + chrome_options.add_argument("--disable-background-timer-throttling") # Skip remaining options that trigger anti-bot services return chrome_options chrome_options.add_argument("--test-type") @@ -4523,6 +4529,7 @@ def get_local_driver( and uc_chrome_version and uc_chrome_version >= 117 and (headless or headless2) + and chromium_arg != "decoy" ): from seleniumbase.console_scripts import ( sb_install diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index 44aa07237d6..7686fd04d3f 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -76,13 +76,21 @@ def __add_sync_methods(self, element): def get(self, url): url = shared_utils.fix_url_as_needed(url) - self.page = self.loop.run_until_complete(self.driver.cdp_base.get(url)) + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base + self.page = self.loop.run_until_complete(driver.get(url)) url_protocol = url.split(":")[0] safe_url = True if url_protocol not in ["about", "data", "chrome"]: safe_url = False if not safe_url: time.sleep(constants.UC.CDP_MODE_OPEN_WAIT) + self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) + + def open(self, url): + self.get(url) def reload(self, ignore_cache=True, script_to_evaluate_on_load=None): self.loop.run_until_complete( @@ -111,18 +119,28 @@ def find_element( with the closest text-length to the text being searched for.""" self.__add_light_pause() selector = self.__convert_to_css_if_xpath(selector) + early_failure = False if (":contains(" in selector): tag_name = selector.split(":contains(")[0].split(" ")[-1] text = selector.split(":contains(")[1].split(")")[0][1:-1] with suppress(Exception): self.loop.run_until_complete( - self.page.select(tag_name, timeout=3) + self.page.select(tag_name, timeout=timeout) ) - self.loop.run_until_complete(self.page.find(text, timeout=3)) - element = self.find_elements_by_text(text, tag_name=tag_name)[0] - return self.__add_sync_methods(element) + self.loop.run_until_complete( + self.page.find(text, timeout=timeout) + ) + elements = [] + with suppress(Exception): + elements = self.find_elements_by_text(text, tag_name=tag_name) + if elements: + return self.__add_sync_methods(elements[0]) + else: + early_failure = True failure = False try: + if early_failure: + raise Exception("Failed!") element = self.loop.run_until_complete( self.page.find( selector, best_match=best_match, timeout=timeout @@ -230,9 +248,11 @@ def __clear_input(self, element): ) def __click(self, element): - return ( + result = ( self.loop.run_until_complete(element.click_async()) ) + self.loop.run_until_complete(self.page.wait()) + return result def __flash(self, element): return ( @@ -250,9 +270,11 @@ def __highlight_overlay(self, element): ) def __mouse_click(self, element): - return ( + result = ( self.loop.run_until_complete(element.mouse_click_async()) ) + self.loop.run_until_complete(self.page.wait()) + return result def __mouse_drag(self, element, destination): return ( @@ -353,33 +375,51 @@ def __get_js_attributes(self, element): def tile_windows(self, windows=None, max_columns=0): """Tile windows and return the grid of tiled windows.""" + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.tile_windows(windows, max_columns) + driver.tile_windows(windows, max_columns) ) def get_all_cookies(self, *args, **kwargs): + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.cookies.get_all(*args, **kwargs) + driver.cookies.get_all(*args, **kwargs) ) def set_all_cookies(self, *args, **kwargs): + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.cookies.set_all(*args, **kwargs) + driver.cookies.set_all(*args, **kwargs) ) def save_cookies(self, *args, **kwargs): + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.cookies.save(*args, **kwargs) + driver.cookies.save(*args, **kwargs) ) def load_cookies(self, *args, **kwargs): + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.cookies.load(*args, **kwargs) + driver.cookies.load(*args, **kwargs) ) def clear_cookies(self, *args, **kwargs): + driver = self.driver + if hasattr(driver, "cdp_base"): + driver = driver.cdp_base return self.loop.run_until_complete( - self.driver.cdp_base.cookies.clear(*args, **kwargs) + driver.cookies.clear(*args, **kwargs) ) def sleep(self, seconds): @@ -408,17 +448,20 @@ def click(self, selector, timeout=settings.SMALL_TIMEOUT): self.__add_light_pause() element.click() self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def click_active_element(self): self.loop.run_until_complete( self.page.evaluate("document.activeElement.click()") ) self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def click_if_visible(self, selector): if self.is_element_visible(selector): self.find_element(selector).click() self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def mouse_click(self, selector, timeout=settings.SMALL_TIMEOUT): """(Attempt simulating a mouse click)""" @@ -427,6 +470,7 @@ def mouse_click(self, selector, timeout=settings.SMALL_TIMEOUT): self.__add_light_pause() element.mouse_click() self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def nested_click(self, parent_selector, selector): """ @@ -436,6 +480,7 @@ def nested_click(self, parent_selector, selector): element = self.find_element(parent_selector) element.query_selector(selector).mouse_click() self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def get_nested_element(self, parent_selector, selector): """(Can be used to find an element inside an iframe)""" @@ -483,6 +528,7 @@ def send_keys(self, selector, text, timeout=settings.SMALL_TIMEOUT): text = text[:-1] + "\r\n" element.send_keys(text) self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def press_keys(self, selector, text, timeout=settings.SMALL_TIMEOUT): """Similar to send_keys(), but presses keys at human speed.""" @@ -499,6 +545,7 @@ def press_keys(self, selector, text, timeout=settings.SMALL_TIMEOUT): element.send_keys("\r\n") time.sleep(0.0375) self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def type(self, selector, text, timeout=settings.SMALL_TIMEOUT): """Similar to send_keys(), but clears the text field first.""" @@ -510,6 +557,7 @@ def type(self, selector, text, timeout=settings.SMALL_TIMEOUT): text = text[:-1] + "\r\n" element.send_keys(text) self.__slow_mode_pause_if_set() + self.loop.run_until_complete(self.page.wait()) def evaluate(self, expression): """Run a JavaScript expression and return the result.""" @@ -760,6 +808,10 @@ def get_element_html(self, selector): ) ) + def set_locale(self, locale): + """(Settings will take effect on the next page load)""" + self.loop.run_until_complete(self.page.set_locale(locale)) + def set_attributes(self, selector, attribute, value): """This method uses JavaScript to set/update a common attribute. All matching selectors from querySelectorAll() are used. diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py index 2d37b533135..90399539e03 100644 --- a/seleniumbase/fixtures/base_case.py +++ b/seleniumbase/fixtures/base_case.py @@ -4099,6 +4099,7 @@ def get_new_driver( or not any(os.scandir(user_data_dir)) ) and self.browser == "chrome" + and shared_utils.is_chrome_130_or_newer(binary_location) ): import tempfile if not user_data_dir: @@ -4108,7 +4109,7 @@ def get_new_driver( try: decoy_driver = browser_launcher.get_driver( browser_name=browser_name, - headless=headless, + headless=False, locale_code=locale_code, use_grid=use_grid, protocol=protocol, @@ -4134,7 +4135,7 @@ def get_new_driver( log_cdp_events=log_cdp_events, no_sandbox=no_sandbox, disable_gpu=disable_gpu, - headless1=headless1, + headless1=False, headless2=True, incognito=incognito, guest_mode=guest_mode, @@ -4147,12 +4148,12 @@ def get_new_driver( host_resolver_rules=host_resolver_rules, block_images=block_images, do_not_track=do_not_track, - chromium_arg=chromium_arg, + chromium_arg="decoy", firefox_arg=firefox_arg, firefox_pref=firefox_pref, user_data_dir=user_data_dir, - extension_zip=extension_zip, - extension_dir=extension_dir, + extension_zip=None, + extension_dir=None, disable_features=disable_features, binary_location=binary_location, driver_version=driver_version, @@ -4166,12 +4167,13 @@ def get_new_driver( device_pixel_ratio=d_p_r, browser=browser_name, ) - time.sleep(0.555) + time.sleep(0.2) except Exception: pass finally: with suppress(Exception): decoy_driver.quit() + time.sleep(0.1) # Launch a web browser new_driver = browser_launcher.get_driver( browser_name=browser_name, @@ -4513,8 +4515,15 @@ def save_cookies(self, name="cookies.txt"): cookies_file.writelines(json_cookies) cookies_file.close() - def load_cookies(self, name="cookies.txt"): - """Loads the page cookies from the "saved_cookies" folder.""" + def load_cookies(self, name="cookies.txt", expiry=False): + """ + Loads the page cookies from the "saved_cookies" folder. + Usage for setting expiry: + If expiry == 0 or False: Delete "expiry". + If expiry == -1 (or < 0): Do not modify "expiry". + If expiry > 0: Set "expiry" to expiry minutes in the future. + If expiry == True: Set "expiry" to 24 hours in the future. + """ cookies = self.get_saved_cookies(name) self.wait_for_ready_state_complete() origin = self.get_origin() @@ -4523,8 +4532,14 @@ def load_cookies(self, name="cookies.txt"): if "domain" in cookie: if cookie["domain"] not in origin: cookie["domain"] = trim_origin - if "expiry" in cookie: + if "expiry" in cookie and (not expiry or expiry == 0): del cookie["expiry"] + elif isinstance(expiry, (int, float)) and expiry < 0: + pass + elif isinstance(expiry, (int, float)) and expiry > 0: + cookie["expiry"] = int(time.time()) + int(expiry * 60.0) + elif expiry: + cookie["expiry"] = int(time.time()) + 86400 self.driver.add_cookie(cookie) def delete_all_cookies(self): @@ -4585,18 +4600,57 @@ def get_cookie(self, name): def get_cookies(self): return self.driver.get_cookies() - def add_cookie(self, cookie_dict): + def add_cookie(self, cookie_dict, expiry=False): """Usage examples: self.add_cookie({'name': 'foo', 'value': 'bar'}) self.add_cookie({'name': 'foo', 'value': 'bar', 'path': '/'}) self.add_cookie({'name': 'foo', 'value': 'bar', 'secure': True}) self.add_cookie({'name': 'foo', 'value': 'bar', 'sameSite': 'Strict'}) + Usage for setting expiry: + If expiry == 0 or False: Delete "expiry". + If expiry == -1 (or < 0): Do not modify "expiry". + If expiry > 0: Set "expiry" to expiry minutes in the future. + If expiry == True: Set "expiry" to 24 hours in the future. """ + cookie = cookie_dict + if "domain" in cookie: + origin = self.get_origin() + trim_origin = origin.split("://")[-1] + if cookie["domain"] not in origin: + cookie["domain"] = trim_origin + if "expiry" in cookie and (not expiry or expiry == 0): + del cookie["expiry"] + elif isinstance(expiry, (int, float)) and expiry < 0: + pass + elif isinstance(expiry, (int, float)) and expiry > 0: + cookie["expiry"] = int(time.time()) + int(expiry * 60.0) + elif expiry: + cookie["expiry"] = int(time.time()) + 86400 self.driver.add_cookie(cookie_dict) - def add_cookies(self, cookies): - for cookie_dict in cookies: - self.driver.add_cookie(cookie_dict) + def add_cookies(self, cookies, expiry=False): + """ + Usage for setting expiry: + If expiry == 0 or False: Delete "expiry". + If expiry == -1 (or < 0): Do not modify "expiry". + If expiry > 0: Set "expiry" to expiry minutes in the future. + If expiry == True: Set "expiry" to 24 hours in the future. + """ + origin = self.get_origin() + trim_origin = origin.split("://")[-1] + for cookie in cookies: + if "domain" in cookie: + if cookie["domain"] not in origin: + cookie["domain"] = trim_origin + if "expiry" in cookie and (not expiry or expiry == 0): + del cookie["expiry"] + elif isinstance(expiry, (int, float)) and expiry < 0: + pass + elif isinstance(expiry, (int, float)) and expiry > 0: + cookie["expiry"] = int(time.time()) + int(expiry * 60.0) + elif expiry: + cookie["expiry"] = int(time.time()) + 86400 + self.driver.add_cookie(cookie) def __set_esc_skip(self): if hasattr(self, "esc_end") and self.esc_end: diff --git a/seleniumbase/fixtures/shared_utils.py b/seleniumbase/fixtures/shared_utils.py index d7e85f77e89..dbfd8ac7064 100644 --- a/seleniumbase/fixtures/shared_utils.py +++ b/seleniumbase/fixtures/shared_utils.py @@ -4,6 +4,7 @@ import platform import sys import time +from contextlib import suppress from seleniumbase import config as sb_config from seleniumbase.fixtures import constants @@ -99,6 +100,23 @@ def is_cdp_swap_needed(driver): ) +def is_chrome_130_or_newer(self, binary_location=None): + from seleniumbase.core import detect_b_ver + + """Due to changes in Chrome-130, UC Mode freezes at start-up + unless the user-data-dir already exists and is populated.""" + with suppress(Exception): + if not binary_location: + ver = detect_b_ver.get_browser_version_from_os("google-chrome") + else: + ver = detect_b_ver.get_browser_version_from_binary( + binary_location + ) + if ver and len(ver) > 3 and int(ver.split(".")[0]) >= 130: + return True + return False + + def format_exc(exception, message): """Formats an exception message to make the output cleaner.""" from selenium.common.exceptions import ElementNotVisibleException diff --git a/seleniumbase/plugins/driver_manager.py b/seleniumbase/plugins/driver_manager.py index 192e93f71d4..3f0e8e83e41 100644 --- a/seleniumbase/plugins/driver_manager.py +++ b/seleniumbase/plugins/driver_manager.py @@ -233,6 +233,7 @@ def Driver( wire (bool): Shortcut / Duplicate of "use_wire". pls (str): Shortcut / Duplicate of "page_load_strategy". """ + from contextlib import suppress from seleniumbase import config as sb_config from seleniumbase.config import settings from seleniumbase.fixtures import constants @@ -800,6 +801,7 @@ def Driver( or not any(os.scandir(user_data_dir)) ) and browser == "chrome" + and shared_utils.is_chrome_130_or_newer(binary_location) ): import tempfile import time @@ -849,12 +851,12 @@ def Driver( host_resolver_rules=host_resolver_rules, block_images=block_images, do_not_track=do_not_track, - chromium_arg=chromium_arg, + chromium_arg="decoy", firefox_arg=firefox_arg, firefox_pref=firefox_pref, user_data_dir=user_data_dir, - extension_zip=extension_zip, - extension_dir=extension_dir, + extension_zip=None, + extension_dir=None, disable_features=disable_features, binary_location=binary_location, driver_version=driver_version, @@ -868,14 +870,13 @@ def Driver( device_pixel_ratio=d_p_r, browser=browser_name, ) - time.sleep(0.555) + time.sleep(0.2) except Exception: pass finally: - try: + with suppress(Exception): decoy_driver.quit() - except Exception: - pass + time.sleep(0.1) driver = browser_launcher.get_driver( browser_name=browser_name, diff --git a/seleniumbase/undetected/cdp_driver/cdp_util.py b/seleniumbase/undetected/cdp_driver/cdp_util.py index 1307343625a..afa65b6b528 100644 --- a/seleniumbase/undetected/cdp_driver/cdp_util.py +++ b/seleniumbase/undetected/cdp_driver/cdp_util.py @@ -5,6 +5,7 @@ import time import types import typing +from seleniumbase.fixtures import shared_utils from typing import Optional, List, Union, Callable from .element import Element from .browser import Browser @@ -92,30 +93,40 @@ async def start( async def start_async(*args, **kwargs) -> Browser: headless = False - if "headless" in kwargs: - headless = kwargs["headless"] - decoy_args = kwargs - decoy_args["headless"] = True - driver = await start(**decoy_args) - kwargs["headless"] = headless - kwargs["user_data_dir"] = driver.config.user_data_dir - driver.stop() # Due to Chrome-130, must stop & start - time.sleep(0.15) + binary_location = None + if "browser_executable_path" in kwargs: + binary_location = kwargs["browser_executable_path"] + if shared_utils.is_chrome_130_or_newer(binary_location): + if "headless" in kwargs: + headless = kwargs["headless"] + decoy_args = kwargs + decoy_args["headless"] = True + driver = await start(**decoy_args) + kwargs["headless"] = headless + kwargs["user_data_dir"] = driver.config.user_data_dir + time.sleep(0.2) + driver.stop() # Due to Chrome-130, must stop & start + time.sleep(0.1) return await start(*args, **kwargs) def start_sync(*args, **kwargs) -> Browser: loop = asyncio.get_event_loop() headless = False - if "headless" in kwargs: - headless = kwargs["headless"] - decoy_args = kwargs - decoy_args["headless"] = True - driver = loop.run_until_complete(start(**decoy_args)) - kwargs["headless"] = headless - kwargs["user_data_dir"] = driver.config.user_data_dir - driver.stop() # Due to Chrome-130, must stop & start - time.sleep(0.15) + binary_location = None + if "browser_executable_path" in kwargs: + binary_location = kwargs["browser_executable_path"] + if shared_utils.is_chrome_130_or_newer(binary_location): + if "headless" in kwargs: + headless = kwargs["headless"] + decoy_args = kwargs + decoy_args["headless"] = True + driver = loop.run_until_complete(start(**decoy_args)) + kwargs["headless"] = headless + kwargs["user_data_dir"] = driver.config.user_data_dir + time.sleep(0.2) + driver.stop() # Due to Chrome-130, must stop & start + time.sleep(0.1) return loop.run_until_complete(start(*args, **kwargs)) diff --git a/seleniumbase/undetected/cdp_driver/config.py b/seleniumbase/undetected/cdp_driver/config.py index 37c86892970..b5641eb8e3e 100644 --- a/seleniumbase/undetected/cdp_driver/config.py +++ b/seleniumbase/undetected/cdp_driver/config.py @@ -105,9 +105,13 @@ def __init__( "--remote-allow-origins=*", "--no-first-run", "--no-service-autorun", + "--disable-auto-reload", "--no-default-browser-check", "--homepage=about:blank", "--no-pings", + "--wm-window-animations-disabled", + "--animation-duration-scale=0", + "--enable-privacy-sandbox-ads-apis", "--safebrowsing-disable-download-protection", '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"', "--password-store=basic", @@ -118,10 +122,12 @@ def __init__( "--disable-prompt-on-repost", "--disable-password-generation", "--disable-ipc-flooding-protection", + "--disable-background-timer-throttling", "--disable-search-engine-choice-screen", "--disable-backgrounding-occluded-windows", "--disable-client-side-phishing-detection", "--disable-top-sites", + "--disable-translate", "--disable-renderer-backgrounding", "--disable-background-networking", "--disable-dev-shm-usage", diff --git a/seleniumbase/undetected/cdp_driver/connection.py b/seleniumbase/undetected/cdp_driver/connection.py index 81c519b7bca..4ecfbe5624d 100644 --- a/seleniumbase/undetected/cdp_driver/connection.py +++ b/seleniumbase/undetected/cdp_driver/connection.py @@ -9,6 +9,7 @@ import types from asyncio import iscoroutine, iscoroutinefunction from typing import ( + Optional, Generator, Union, Awaitable, @@ -336,6 +337,10 @@ async def wait(self, t: Union[int, float] = None): # No listener created yet. pass + async def set_locale(self, locale: Optional[str] = None): + """Sets the Language Locale code via set_user_agent_override.""" + await self.send(cdp.network.set_user_agent_override("", locale)) + def __getattr__(self, item): """:meta private:""" try: diff --git a/seleniumbase/undetected/cdp_driver/tab.py b/seleniumbase/undetected/cdp_driver/tab.py index 6cdf9b8334b..96bdbade2b6 100644 --- a/seleniumbase/undetected/cdp_driver/tab.py +++ b/seleniumbase/undetected/cdp_driver/tab.py @@ -323,7 +323,7 @@ async def select_all( async def get( self, - url="chrome://welcome", + url="about:blank", new_tab: bool = False, new_window: bool = False, ): From fbe10bb68b524885dc12b5cbc7e7471547f1c842 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 26 Oct 2024 16:00:40 -0400 Subject: [PATCH 2/4] Update examples for CDP Mode / UC Mode --- examples/cdp_mode/raw_async.py | 24 ++++++++++++++++++------ examples/cdp_mode/raw_bestwestern.py | 14 +++++++------- examples/cdp_mode/raw_cdp.py | 13 +++++++------ examples/cdp_mode/raw_cdp_with_sb.py | 8 +++----- examples/cdp_mode/raw_footlocker.py | 2 +- examples/cdp_mode/raw_hyatt.py | 8 ++++---- examples/cdp_mode/raw_pokemon.py | 6 +++--- examples/cdp_mode/raw_priceline.py | 14 +++++++------- examples/cdp_mode/raw_req_async.py | 7 +++---- examples/cdp_mode/raw_req_sb.py | 7 +++---- examples/raw_gui_click.py | 2 +- examples/raw_pyautogui.py | 2 +- examples/raw_recaptcha.py | 2 -- examples/verify_undetected.py | 2 +- 14 files changed, 59 insertions(+), 52 deletions(-) diff --git a/examples/cdp_mode/raw_async.py b/examples/cdp_mode/raw_async.py index 779afd3d9e2..6835286498e 100644 --- a/examples/cdp_mode/raw_async.py +++ b/examples/cdp_mode/raw_async.py @@ -1,12 +1,15 @@ import asyncio import time +from contextlib import suppress from seleniumbase.core import sb_cdp from seleniumbase.undetected import cdp_driver async def main(): driver = await cdp_driver.cdp_util.start_async() - page = await driver.get("https://www.priceline.com/") + page = await driver.get("about:blank") + await page.set_locale("en") + await page.get("https://www.priceline.com/") time.sleep(3) print(await page.evaluate("document.title")) element = await page.select('[data-testid*="endLocation"]') @@ -22,24 +25,33 @@ async def main(): # Call everything without using async / await driver = cdp_driver.cdp_util.start_sync() - page = loop.run_until_complete(driver.get("https://www.pokemon.com/us")) + page = loop.run_until_complete(driver.get("about:blank")) + loop.run_until_complete(page.set_locale("en")) + loop.run_until_complete(page.get("https://www.pokemon.com/us")) time.sleep(3) print(loop.run_until_complete(page.evaluate("document.title"))) + with suppress(Exception): + selector = "button#onetrust-reject-all-handler" + element = loop.run_until_complete(page.select(selector, timeout=1)) + loop.run_until_complete(element.click_async()) + time.sleep(1) element = loop.run_until_complete(page.select("span.icon_pokeball")) loop.run_until_complete(element.click_async()) - time.sleep(1) + time.sleep(1.5) print(loop.run_until_complete(page.evaluate("document.title"))) time.sleep(1) # Call CDP methods via the simplified CDP API - page = loop.run_until_complete(driver.get("https://www.priceline.com/")) + page = loop.run_until_complete(driver.get("about:blank")) sb = sb_cdp.CDPMethods(loop, page, driver) + sb.set_locale("en") + sb.open("https://www.priceline.com/") sb.sleep(3) sb.internalize_links() # Don't open links in a new tab sb.click("#link_header_nav_experiences") - sb.sleep(2) + sb.sleep(2.5) sb.remove_elements("msm-cookie-banner") - sb.sleep(1) + sb.sleep(1.5) sb.press_keys('input[data-test-id*="search"]', "Amsterdam") sb.sleep(2) sb.click('span[data-test-id*="autocomplete"]') diff --git a/examples/cdp_mode/raw_bestwestern.py b/examples/cdp_mode/raw_bestwestern.py index a4288bbdcbe..da2dfddb51a 100644 --- a/examples/cdp_mode/raw_bestwestern.py +++ b/examples/cdp_mode/raw_bestwestern.py @@ -3,20 +3,20 @@ with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.bestwestern.com/en_US.html" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(2.5) sb.cdp.click_if_visible("div.onetrust-close-btn-handler") - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click("input#destination-input") - sb.sleep(1.5) + sb.sleep(2) location = "Palm Springs, CA, USA" sb.cdp.press_keys("input#destination-input", location) - sb.sleep(0.6) + sb.sleep(1) sb.cdp.click("ul#google-suggestions li") - sb.sleep(0.6) + sb.sleep(1) sb.cdp.click("button#btn-modify-stay-update") - sb.sleep(1.5) - sb.cdp.click("label#available-label") sb.sleep(4) + sb.cdp.click("label#available-label") + sb.sleep(2.5) print("Best Western Hotels in %s:" % location) summary_details = sb.cdp.get_text("#summary-details-column") dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip() diff --git a/examples/cdp_mode/raw_cdp.py b/examples/cdp_mode/raw_cdp.py index 1d70cb66e1e..54d6b28c01d 100644 --- a/examples/cdp_mode/raw_cdp.py +++ b/examples/cdp_mode/raw_cdp.py @@ -8,21 +8,22 @@ @decorators.print_runtime("CDP Priceline Example") def main(): - url = "https://www.priceline.com/" + url0 = "about:blank" # Set Locale code from here first + url1 = "https://www.priceline.com/" # (The "real" URL) loop = asyncio.new_event_loop() driver = cdp_driver.cdp_util.start_sync() - page = loop.run_until_complete(driver.get(url)) + page = loop.run_until_complete(driver.get(url0)) sb = sb_cdp.CDPMethods(loop, page, driver) + sb.set_locale("en") # This test expects English locale + sb.open(url1) sb.sleep(3) sb.internalize_links() # Don't open links in a new tab sb.click("#link_header_nav_experiences") - sb.sleep(2) + sb.sleep(2.5) sb.remove_elements("msm-cookie-banner") - sb.sleep(1) + sb.sleep(1.5) location = "Amsterdam" sb.press_keys('input[data-test-id*="search"]', location) - sb.sleep(1) - sb.click('input[data-test-id*="search"]') sb.sleep(2) sb.click('span[data-test-id*="autocomplete"]') sb.sleep(5) diff --git a/examples/cdp_mode/raw_cdp_with_sb.py b/examples/cdp_mode/raw_cdp_with_sb.py index 7ed71eabe0b..6b1d74a1f59 100644 --- a/examples/cdp_mode/raw_cdp_with_sb.py +++ b/examples/cdp_mode/raw_cdp_with_sb.py @@ -3,19 +3,17 @@ from seleniumbase import SB -with SB(uc=True, test=True) as sb: +with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.priceline.com/" sb.activate_cdp_mode(url) sb.sleep(3) sb.internalize_links() # Don't open links in a new tab sb.click("#link_header_nav_experiences") - sb.sleep(2) + sb.sleep(2.5) sb.remove_elements("msm-cookie-banner") - sb.sleep(1) + sb.sleep(1.5) location = "Amsterdam" sb.press_keys('input[data-test-id*="search"]', location) - sb.sleep(1) - sb.click('input[data-test-id*="search"]') sb.sleep(2) sb.click('span[data-test-id*="autocomplete"]') sb.sleep(5) diff --git a/examples/cdp_mode/raw_footlocker.py b/examples/cdp_mode/raw_footlocker.py index 33463b0cf2a..93dcf37dac3 100644 --- a/examples/cdp_mode/raw_footlocker.py +++ b/examples/cdp_mode/raw_footlocker.py @@ -12,7 +12,7 @@ sb.cdp.press_keys('input[aria-label="Search"]', search) sb.sleep(2) sb.cdp.mouse_click('ul[id*="typeahead"] li div') - sb.sleep(2) + sb.sleep(3) elements = sb.cdp.select_all("a.ProductCard-link") if elements: print('**** Found results for "%s": ****' % search) diff --git a/examples/cdp_mode/raw_hyatt.py b/examples/cdp_mode/raw_hyatt.py index ecdf3fc4dd6..38d372ae890 100644 --- a/examples/cdp_mode/raw_hyatt.py +++ b/examples/cdp_mode/raw_hyatt.py @@ -3,13 +3,13 @@ with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.hyatt.com/" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(2) sb.cdp.click_if_visible('button[aria-label="Close"]') - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click('span:contains("Explore")') sb.sleep(1) sb.cdp.click('a:contains("Hotels & Resorts")') - sb.sleep(2.5) + sb.sleep(3) location = "Anaheim, CA, USA" sb.cdp.press_keys("input#searchbox", location) sb.sleep(1) @@ -18,7 +18,7 @@ sb.cdp.click('div.hotel-card-footer button') sb.sleep(1) sb.cdp.click('button[data-locator="find-hotels"]') - sb.sleep(4) + sb.sleep(5) hotel_names = sb.cdp.select_all( 'div[data-booking-status="BOOKABLE"] [class*="HotelCard_header"]' ) diff --git a/examples/cdp_mode/raw_pokemon.py b/examples/cdp_mode/raw_pokemon.py index 6706dcdb5fa..c7d2d9de057 100644 --- a/examples/cdp_mode/raw_pokemon.py +++ b/examples/cdp_mode/raw_pokemon.py @@ -3,15 +3,15 @@ with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.pokemon.com/us" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(3) sb.cdp.click_if_visible("button#onetrust-reject-all-handler") - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]') sb.sleep(1) sb.cdp.click('b:contains("Show Advanced Search")') sb.sleep(1) sb.cdp.click('span[data-type="type"][data-value="electric"]') - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click("a#advSearch") sb.sleep(1) sb.cdp.click('img[src*="img/pokedex/detail/025.png"]') diff --git a/examples/cdp_mode/raw_priceline.py b/examples/cdp_mode/raw_priceline.py index 38fc9482c8b..b79f4de1e46 100644 --- a/examples/cdp_mode/raw_priceline.py +++ b/examples/cdp_mode/raw_priceline.py @@ -4,7 +4,7 @@ window_handle = sb.driver.current_window_handle url = "https://www.priceline.com" sb.activate_cdp_mode(url) - sb.sleep(1) + sb.sleep(3) sb.cdp.click('input[name="endLocation"]') sb.sleep(1) location = "Portland, OR, USA" @@ -12,18 +12,18 @@ sb.cdp.press_keys('input[name="endLocation"]', location) sb.sleep(1) sb.click_if_visible('input[name="endLocation"]') - sb.sleep(1) - sb.cdp.click("Oregon, United States") - sb.sleep(1) + sb.sleep(0.5) + sb.cdp.click(selection) + sb.sleep(1.5) sb.cdp.click('button[aria-label="Dismiss calendar"]') - sb.sleep(3) + sb.sleep(5) sb.connect() if len(sb.driver.window_handles) > 1: sb.switch_to_window(window_handle) sb.driver.close() - sb.sleep(0.1) + sb.sleep(0.2) sb.switch_to_newest_window() - sb.sleep(1) + sb.sleep(0.6) hotel_names = sb.find_elements('a[data-autobot-element-id*="HOTEL_NAME"]') hotel_prices = sb.find_elements('span[font-size="4,,,5"]') print("Priceline Hotels in %s:" % location) diff --git a/examples/cdp_mode/raw_req_async.py b/examples/cdp_mode/raw_req_async.py index ea2150fd90a..1f5abffacc4 100644 --- a/examples/cdp_mode/raw_req_async.py +++ b/examples/cdp_mode/raw_req_async.py @@ -9,12 +9,11 @@ class RequestPausedTest(): async def request_paused_handler(self, event, tab): r = event.request is_image = ".png" in r.url or ".jpg" in r.url or ".gif" in r.url - is_blocked = True if is_image else False - if not is_blocked: + if not is_image: # Let the data through tab.feed_cdp( mycdp.fetch.continue_request(request_id=event.request_id) ) - else: + else: # Block the data (images) TIMED_OUT = mycdp.network.ErrorReason.TIMED_OUT s = f"BLOCKING | {r.method} | {r.url}" print(f" >>> ------------\n{s}") @@ -23,7 +22,7 @@ async def request_paused_handler(self, event, tab): ) async def start_test(self): - driver = await cdp_driver.cdp_util.start_async(incognito=True) + driver = await cdp_driver.cdp_util.start_async() tab = await driver.get("about:blank") tab.add_handler(mycdp.fetch.RequestPaused, self.request_paused_handler) url = "https://gettyimages.com/photos/firefly-2003-nathan" diff --git a/examples/cdp_mode/raw_req_sb.py b/examples/cdp_mode/raw_req_sb.py index 2528f5c599d..0de2ceae450 100644 --- a/examples/cdp_mode/raw_req_sb.py +++ b/examples/cdp_mode/raw_req_sb.py @@ -6,17 +6,16 @@ async def request_paused_handler(event, tab): r = event.request is_image = ".png" in r.url or ".jpg" in r.url or ".gif" in r.url - is_blocked = True if is_image else False - if not is_blocked: + if not is_image: # Let the data through tab.feed_cdp(mycdp.fetch.continue_request(request_id=event.request_id)) - else: + else: # Block the data (images) TIMED_OUT = mycdp.network.ErrorReason.TIMED_OUT s = f"BLOCKING | {r.method} | {r.url}" print(f" >>> ------------\n{s}") tab.feed_cdp(mycdp.fetch.fail_request(event.request_id, TIMED_OUT)) -with SB(uc=True, test=True, locale_code="en", incognito=True) as sb: +with SB(uc=True, test=True, locale_code="en") as sb: sb.activate_cdp_mode("about:blank") sb.cdp.add_handler(mycdp.fetch.RequestPaused, request_paused_handler) url = "https://gettyimages.com/photos/firefly-2003-nathan" diff --git a/examples/raw_gui_click.py b/examples/raw_gui_click.py index a2502a800df..acb2d12304a 100644 --- a/examples/raw_gui_click.py +++ b/examples/raw_gui_click.py @@ -8,7 +8,7 @@ with SB(uc=True, test=True, rtf=True, agent=agent) as sb: url = "https://gitlab.com/users/sign_in" - sb.activate_cdp_mode(url) + sb.uc_open_with_reconnect(url) sb.uc_gui_click_captcha() # Only if needed sb.assert_element('label[for="user_login"]') sb.assert_element('input[data-testid*="username"]') diff --git a/examples/raw_pyautogui.py b/examples/raw_pyautogui.py index 7bc4eb27926..bbf8b4f57f7 100644 --- a/examples/raw_pyautogui.py +++ b/examples/raw_pyautogui.py @@ -8,7 +8,7 @@ with SB(uc=True, test=True, rtf=True, agent=agent) as sb: url = "https://gitlab.com/users/sign_in" - sb.uc_open_with_reconnect(url, 4) + sb.uc_open_with_reconnect(url) sb.uc_gui_handle_captcha() # Only if needed sb.assert_element('label[for="user_login"]') sb.assert_element('input[data-testid*="username"]') diff --git a/examples/raw_recaptcha.py b/examples/raw_recaptcha.py index da37b648a9a..1073ea4309f 100644 --- a/examples/raw_recaptcha.py +++ b/examples/raw_recaptcha.py @@ -2,7 +2,6 @@ with SB(uc=True, test=True) as sb: url = "https://seleniumbase.io/apps/recaptcha" - sb.activate_cdp_mode(url) sb.uc_gui_handle_captcha() # Try with TAB + SPACEBAR sb.assert_element("img#captcha-success", timeout=3) sb.set_messenger_theme(location="top_left") @@ -10,7 +9,6 @@ with SB(uc=True, test=True) as sb: url = "https://seleniumbase.io/apps/recaptcha" - sb.activate_cdp_mode(url) sb.uc_gui_click_captcha('iframe[src*="/recaptcha/"]') sb.assert_element("img#captcha-success", timeout=3) sb.set_messenger_theme(location="top_left") diff --git a/examples/verify_undetected.py b/examples/verify_undetected.py index 96a88633fc1..e00ab35ac15 100644 --- a/examples/verify_undetected.py +++ b/examples/verify_undetected.py @@ -10,7 +10,7 @@ def test_browser_is_undetected(self): url = "https://gitlab.com/users/sign_in" if not self.undetectable: self.get_new_driver(undetectable=True) - self.activate_cdp_mode(url) + self.uc_open_with_reconnect(url) self.uc_gui_click_captcha() self.assert_text("Username", '[for="user_login"]', timeout=3) self.post_message("SeleniumBase wasn't detected", duration=4) From 15a401eefa52961e1eaa574b0fbbfad96dae0040 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 26 Oct 2024 16:01:21 -0400 Subject: [PATCH 3/4] Update the documentation --- examples/cdp_mode/ReadMe.md | 31 ++++++++++++++++--------------- help_docs/method_summary.md | 6 +++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index 2c2315b5229..bf6ca1ea82b 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -2,7 +2,7 @@ ## [](https://github.com/seleniumbase/SeleniumBase/) CDP Mode 🐙 -🐙 SeleniumBase CDP Mode (Chrome Devtools Protocol Mode) is a special mode inside of SeleniumBase UC Mode that lets bots appear human while controlling the browser with the CDP-Driver. Although regular UC Mode can't perform WebDriver actions while the driver is disconnected from the browser, the CDP-Driver can still perform actions (while maintaining its cover). +🐙 SeleniumBase CDP Mode (Chrome Devtools Protocol Mode) is a special mode inside of SeleniumBase UC Mode that lets bots appear human while controlling the browser with the CDP-Driver. Although regular UC Mode can't perform WebDriver actions while the driver is disconnected from the browser, the CDP-Driver can still perform actions while maintaining its cover. (For Python 3.11 or newer!) 👤 UC Mode avoids bot-detection by first disconnecting WebDriver from the browser at strategic times, calling special PyAutoGUI methods to bypass CAPTCHAs (as needed), and finally reconnecting the driver afterwards so that WebDriver actions can be performed again. Although this approach works for bypassing simple CAPTCHAs, more flexibility is needed for bypassing bot-detection on websites with advanced protection. (That's where CDP Mode comes in.) @@ -74,15 +74,15 @@ from seleniumbase import SB with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.pokemon.com/us" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(3) sb.cdp.click_if_visible("button#onetrust-reject-all-handler") - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]') sb.sleep(1) sb.cdp.click('b:contains("Show Advanced Search")') sb.sleep(1) sb.cdp.click('span[data-type="type"][data-value="electric"]') - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click("a#advSearch") sb.sleep(1) sb.cdp.click('img[src*="img/pokedex/detail/025.png"]') @@ -130,13 +130,13 @@ from seleniumbase import SB with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.hyatt.com/" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(2) sb.cdp.click_if_visible('button[aria-label="Close"]') - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click('span:contains("Explore")') sb.sleep(1) sb.cdp.click('a:contains("Hotels & Resorts")') - sb.sleep(2.5) + sb.sleep(3) location = "Anaheim, CA, USA" sb.cdp.press_keys("input#searchbox", location) sb.sleep(1) @@ -145,7 +145,7 @@ with SB(uc=True, test=True, locale_code="en") as sb: sb.cdp.click('div.hotel-card-footer button') sb.sleep(1) sb.cdp.click('button[data-locator="find-hotels"]') - sb.sleep(4) + sb.sleep(5) hotel_names = sb.cdp.select_all( 'div[data-booking-status="BOOKABLE"] [class*="HotelCard_header"]' ) @@ -177,20 +177,20 @@ from seleniumbase import SB with SB(uc=True, test=True, locale_code="en") as sb: url = "https://www.bestwestern.com/en_US.html" sb.activate_cdp_mode(url) - sb.sleep(1.5) + sb.sleep(2.5) sb.cdp.click_if_visible("div.onetrust-close-btn-handler") - sb.sleep(0.5) + sb.sleep(1) sb.cdp.click("input#destination-input") - sb.sleep(1.5) + sb.sleep(2) location = "Palm Springs, CA, USA" sb.cdp.press_keys("input#destination-input", location) - sb.sleep(0.6) + sb.sleep(1) sb.cdp.click("ul#google-suggestions li") - sb.sleep(0.6) + sb.sleep(1) sb.cdp.click("button#btn-modify-stay-update") - sb.sleep(1.5) - sb.cdp.click("label#available-label") sb.sleep(4) + sb.cdp.click("label#available-label") + sb.sleep(2.5) print("Best Western Hotels in %s:" % location) summary_details = sb.cdp.get_text("#summary-details-column") dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip() @@ -288,6 +288,7 @@ sb.cdp.get_document() sb.cdp.get_flattened_document() sb.cdp.get_element_attributes(selector) sb.cdp.get_element_html(selector) +sb.cdp.set_locale(locale) sb.cdp.set_attributes(selector, attribute, value) sb.cdp.internalize_links() sb.cdp.is_element_present(selector) diff --git a/help_docs/method_summary.md b/help_docs/method_summary.md index d79ba678b36..091f2bf71ef 100644 --- a/help_docs/method_summary.md +++ b/help_docs/method_summary.md @@ -349,7 +349,7 @@ self.save_page_source(name, folder=None) self.save_cookies(name="cookies.txt") -self.load_cookies(name="cookies.txt") +self.load_cookies(name="cookies.txt", expiry=False) self.delete_all_cookies() # Duplicates: self.clear_all_cookies() @@ -362,9 +362,9 @@ self.get_cookie(name) self.get_cookies() -self.add_cookie(cookie_dict) +self.add_cookie(cookie_dict, expiry=False) -self.add_cookies(cookies) +self.add_cookies(cookies, expiry=False) self.wait_for_ready_state_complete(timeout=None) From 1aaee3361e9738033f16ca4fdb7c0d303c8aefea Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 26 Oct 2024 16:01:36 -0400 Subject: [PATCH 4/4] Version 4.32.3 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index eac1b105033..24c8669f810 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.32.2" +__version__ = "4.32.3"