tidyverse · jonthegeek · Jul 9, 2024 · Jul 9, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # rvest (development version)
 
 * New example vignette displays the same starwars data but rendered dynamically using JS, so you need to use `read_html_live()` to get the data.
+* The `click()` method for `LiveHTML` objects gains a `new_page` argument to deal with situations where a click loads a new web page (@jonthegeek, #405).
 
 # rvest 1.0.4
 

diff --git a/R/live.R b/R/live.R
@@ -92,7 +92,7 @@ LiveHTML <- R6::R6Class(
       self$session$Page$navigate(url, wait_ = FALSE)
       self$session$wait_for(p)
 
-      private$root_id <- self$session$DOM$getDocument(0)$root$nodeId
+      private$refresh_root()
     },
 
     #' @description Called when `print()`ed
@@ -129,10 +129,21 @@ LiveHTML <- R6::R6Class(
     #' @description Simulate a click on an HTML element.
     #' @param css CSS selector or xpath expression.
     #' @param n_clicks Number of clicks
-    click = function(css, n_clicks = 1) {
+    #' @param new_page Whether to wait for a new page to load, such as after
+    #'   clicking a link.
+    click = function(css, n_clicks = 1, new_page = FALSE) {
       private$check_active()
       check_number_whole(n_clicks, min = 1)
 
+      # Wait for new page, #405.
+      if (new_page) {
+        p <- self$session$Page$loadEventFired(wait_ = FALSE)
+        on.exit({
+          self$session$wait_for(p)
+          private$refresh_root()
+        }, add = TRUE)
+      }
+
       # Implementation based on puppeteer as described in
       # https://medium.com/@aslushnikov/automating-clicks-in-chromium-a50e7f01d3fb
       # With code from https://github.com/puppeteer/puppeteer/blob/b53de4e0942e93c/packages/puppeteer-core/src/cdp/Input.ts#L431-L459
@@ -170,6 +181,7 @@ LiveHTML <- R6::R6Class(
           button = "left"
         )
       }
+
       invisible(self)
     },
 
@@ -224,6 +236,7 @@ LiveHTML <- R6::R6Class(
         deltaX = left,
         deltaY = top
       )
+
       invisible(self)
     },
 
@@ -268,14 +281,14 @@ LiveHTML <- R6::R6Class(
       if (new_chromote && !self$session$is_active()) {
         suppressMessages({
           self$session <- self$session$respawn()
-          private$root_id <- self$session$DOM$getDocument(0)$root$nodeId
+          private$refresh_root()
         })
       }
     },
 
     wait_for_selector = function(css, timeout = 5) {
       done <- now() + timeout
-      while(now() < done) {
+      while (now() < done) {
         nodes <- private$find_nodes(css)
         if (length(nodes) > 0) {
           return(nodes)
@@ -289,7 +302,22 @@ LiveHTML <- R6::R6Class(
     find_nodes = function(css, xpath) {
       check_exclusive(css, xpath)
       if (!missing(css)) {
-        unlist(self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds)
+        node_ids <- try_fetch(
+          self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds,
+          error = function(cnd) {
+            if (grepl("-32000", cnd_message(cnd))) {
+              cli::cli_abort(
+                c(
+                  "Can't find root node.",
+                  i = "Did you issue a {.code click()} without waiting for a {.arg new_page}?"
+                ),
+                class = "rvest_error-missing_node",
+                parent = cnd
+              )
+            }
+          }
+        )
+        unlist(node_ids)
       } else {
         search <- glue::glue("
           (function() {{
@@ -324,6 +352,10 @@ LiveHTML <- R6::R6Class(
     object_id = function(node_id) {
       # https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-resolveNode
       self$session$DOM$resolveNode(node_id)$object$objectId
+    },
+
+    refresh_root = function() {
+      private$root_id <- self$session$DOM$getDocument(0)$root$nodeId
     }
   )
 )

diff --git a/man/LiveHTML.Rd b/man/LiveHTML.Rd
diff --git a/tests/testthat/_snaps/session.md b/tests/testthat/_snaps/session.md
@@ -7,12 +7,12 @@
       <session> https://hadley.nz/
         Status: 200
         Type:   text/html; charset=utf-8
-        Size:   821273
+        Size:   821905
     Code
       expect_true(is.session(s))
       s <- session_follow_link(s, css = "p a")
     Message
-      Navigating to <http://rstudio.com>.
+      Navigating to <https://posit.co>.
     Code
       session_history(s)
     Output

diff --git a/tests/testthat/html/navigate1.html b/tests/testthat/html/navigate1.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Navigate 1</title>
+</head>
+<body>
+  <a href="navigate2.html">Navigate to Page 2</a>
+</body>
diff --git a/tests/testthat/html/navigate2.html b/tests/testthat/html/navigate2.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Navigate 2</title>
+</head>
+<body>
+  <p>Success!</p>
+</body>
diff --git a/tests/testthat/test-live.R b/tests/testthat/test-live.R
@@ -50,6 +50,14 @@ test_that("can click a button", {
   expect_equal(html_text(html_element(sess, "p")), "double clicked")
 })
 
+test_that("can find elements after click that navigates", {
+  skip_if_no_chromote()
+
+  sess <- read_html_live(html_test_path("navigate1"))
+  sess$click("a", new_page = TRUE)
+  expect_equal(html_text2(html_element(sess, "p")), "Success!")
+})
+
 test_that("can scroll in various ways", {
   skip_if_no_chromote()
 
@@ -88,6 +96,16 @@ test_that("can press special keys",{
   expect_equal(html_text(html_element(sess, "#keyInfo")), "]/BracketRight")
 })
 
+test_that("gracefully errors on missing root node", {
+  skip_if_no_chromote()
+
+  sess <- read_html_live(html_test_path("navigate1"))
+  sess$click("a")
+  expect_error(
+    html_element(sess, "p"),
+    class = "rvest_error-missing_node"
+  )
+})
 
 # as_key_desc -------------------------------------------------------------