diff --git a/R/encoding.R b/R/encoding.R index 8e265a0e..df622f3d 100644 --- a/R/encoding.R +++ b/R/encoding.R @@ -11,12 +11,12 @@ #' # A file with bad encoding included in the package #' path <- system.file("html-ex", "bad-encoding.html", package = "rvest") #' x <- read_html(path) -#' x %>% html_elements("p") %>% html_text() +#' x |> html_elements("p") |> html_text() #' #' html_encoding_guess(x) #' # Two valid encodings, only one of which is correct -#' read_html(path, encoding = "ISO-8859-1") %>% html_elements("p") %>% html_text() -#' read_html(path, encoding = "ISO-8859-2") %>% html_elements("p") %>% html_text() +#' read_html(path, encoding = "ISO-8859-1") |> html_elements("p") |> html_text() +#' read_html(path, encoding = "ISO-8859-2") |> html_elements("p") |> html_text() html_encoding_guess <- function(x) { check_installed("stringi") diff --git a/R/form.R b/R/form.R index d39f81f3..525b993d 100644 --- a/R/form.R +++ b/R/form.R @@ -22,11 +22,11 @@ #' html <- read_html("http://www.google.com") #' search <- html_form(html)[[1]] #' -#' search <- search %>% html_form_set(q = "My little pony", hl = "fr") +#' search <- search |> html_form_set(q = "My little pony", hl = "fr") #' #' # Or if you have a list of values, use !!! #' vals <- list(q = "web scraping", hl = "en") -#' search <- search %>% html_form_set(!!!vals) +#' search <- search |> html_form_set(!!!vals) #' #' # To submit and get result: #' \dontrun{ diff --git a/R/html.R b/R/html.R index 8f2aef77..f8098817 100644 --- a/R/html.R +++ b/R/html.R @@ -8,9 +8,9 @@ #' url <- "https://rvest.tidyverse.org/articles/starwars.html" #' html <- read_html(url) #' -#' html %>% -#' html_element("div") %>% -#' html_children() %>% +#' html |> +#' html_element("div") |> +#' html_children() |> #' html_name() #' @export #' @importFrom xml2 xml_name @@ -35,11 +35,11 @@ html_name <- function(x) { #'
This is an important paragraph
#' ") #' -#' html %>% html_element("h1") -#' html %>% html_elements("p") -#' html %>% html_elements(".important") -#' html %>% html_elements("#first") +#' html |> html_element("h1") +#' html |> html_elements("p") +#' html |> html_elements(".important") +#' html |> html_elements("#first") #' #' # html_element() vs html_elements() -------------------------------------- #' html <- minimal_html(" @@ -54,18 +54,18 @@ #'x y
") -#' x1 <- html %>% html_element("p") %>% html_text() -#' x2 <- html %>% html_element("p") %>% html_text2() +#' x1 <- html |> html_element("p") |> html_text() +#' x2 <- html |> html_element("p") |> html_text2() #' #' # When printed, non-breaking spaces look exactly like regular spaces #' x1 diff --git a/README.Rmd b/README.Rmd index c9cd5788..3811fb8e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -50,21 +50,21 @@ starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html") # Then find elements that match a css selector or XPath expression # using html_elements(). In this example, each\nReleased: 1999 ...
@@ -57,23 +57,29 @@ films
#> [5] \nReleased: ...
#> [6] \nReleased: 1983 ...
#> [7] \nReleased: 2015- ...
+```
+
+``` r
# Then use html_element() to extract one element per film. Here
# we the title is given by the text inside This is an important paragraph x y ", .) %>%
- gsub("\n", " ", .) %>%
+ x |>
+ gsub("\r", "", .) |>
+ gsub("\n\n", " ", .) |>
+ gsub("\n", " ", .) |>
paste0(" ", ., "\nThe Empire Strikes Back\n
\n\nReturn of the Jedi\n
\n\nThe Force Awakens\n
\n
-title <- films %>%
- html_element("h2") %>%
+title <- films |>
+ html_element("h2") |>
html_text2()
title
#> [1] "The Phantom Menace" "Attack of the Clones"
#> [3] "Revenge of the Sith" "A New Hope"
#> [5] "The Empire Strikes Back" "Return of the Jedi"
#> [7] "The Force Awakens"
+```
+
+``` r
# Or use html_attr() to get data out of attributes. html_attr() always
# returns a string so we convert it to an integer using a readr function
-episode <- films %>%
- html_element("h2") %>%
- html_attr("data-id") %>%
+episode <- films |>
+ html_element("h2") |>
+ html_attr("data-id") |>
readr::parse_integer()
episode
#> [1] 1 2 3 4 5 6 7
@@ -85,8 +91,8 @@ frame with `html_table()`:
``` r
html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")
-html %>%
- html_element(".tracklist") %>%
+html |>
+ html_element(".tracklist") |>
html_table()
#> # A tibble: 29 × 4
#> No. Title `Performer(s)` Length
diff --git a/demo/tripadvisor.R b/demo/tripadvisor.R
index 7ece89c8..4246a923 100644
--- a/demo/tripadvisor.R
+++ b/demo/tripadvisor.R
@@ -5,32 +5,32 @@ library(rvest)
url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"
-reviews <- url %>%
- read_html() %>%
+reviews <- url |>
+ read_html() |>
html_elements("#REVIEWS .innerBubble")
-id <- reviews %>%
- html_element(".quote a") %>%
+id <- reviews |>
+ html_element(".quote a") |>
html_attr("id")
-quote <- reviews %>%
- html_element(".quote span") %>%
+quote <- reviews |>
+ html_element(".quote span") |>
html_text()
-rating <- reviews %>%
- html_element(".rating .rating_s_fill") %>%
- html_attr("alt") %>%
- gsub(" of 5 stars", "", .) %>%
+rating <- reviews |>
+ html_element(".rating .rating_s_fill") |>
+ html_attr("alt") |>
+ gsub(" of 5 stars", "", .) |>
as.integer()
-date <- reviews %>%
- html_element(".rating .ratingDate") %>%
- html_attr("title") %>%
- strptime("%b %d, %Y") %>%
+date <- reviews |>
+ html_element(".rating .ratingDate") |>
+ html_attr("title") |>
+ strptime("%b %d, %Y") |>
as.POSIXct()
-review <- reviews %>%
- html_element(".entry .partial_entry") %>%
+review <- reviews |>
+ html_element(".entry .partial_entry") |>
html_text()
-data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
+data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) |> View()
diff --git a/demo/united.R b/demo/united.R
index b4ba358e..24feccfb 100644
--- a/demo/united.R
+++ b/demo/united.R
@@ -3,18 +3,18 @@ library(rvest)
united <- session("http://www.united.com/")
-login <- united %>%
- html_element("form[name=LoginForm]") %>%
- html_form() %>%
+login <- united |>
+ html_element("form[name=LoginForm]") |>
+ html_form() |>
html_form_set(
MpNumber = "GY797363",
Password = password
)
-logged_in <- united %>% session_submit(login)
+logged_in <- united |> session_submit(login)
-logged_in %>%
- follow_link("View account") %>%
- html_element("#ctl00_ContentInfo_AccountSummary_spanEliteMilesNew") %>%
- html_text() %>%
+logged_in |>
+ follow_link("View account") |>
+ html_element("#ctl00_ContentInfo_AccountSummary_spanEliteMilesNew") |>
+ html_text() |>
readr::parse_number()
diff --git a/demo/zillow.R b/demo/zillow.R
index e98d1f14..94401225 100644
--- a/demo/zillow.R
+++ b/demo/zillow.R
@@ -4,25 +4,25 @@ library(tidyr)
page <- read_html("http://www.zillow.com/homes/for_sale/Greenwood-IN/fsba,fsbo,fore,cmsn_lt/house_type/52333_rid/39.638414,-86.011362,39.550714,-86.179419_rect/12_zm/0_mmm/")
-houses <- page %>%
+houses <- page |>
html_elements(".photo-cards li article")
-z_id <- houses %>% html_attr("id")
+z_id <- houses |> html_attr("id")
-address <- houses %>%
- html_element(".zsg-photo-card-address") %>%
+address <- houses |>
+ html_element(".zsg-photo-card-address") |>
html_text()
-price <- houses %>%
- html_element(".zsg-photo-card-price") %>%
- html_text() %>%
+price <- houses |>
+ html_element(".zsg-photo-card-price") |>
+ html_text() |>
readr::parse_number()
-params <- houses %>%
- html_element(".zsg-photo-card-info") %>%
- html_text() %>%
+params <- houses |>
+ html_element(".zsg-photo-card-info") |>
+ html_text() |>
strsplit("\u00b7")
-beds <- params %>% purrr::map_chr(1) %>% readr::parse_number()
-baths <- params %>% purrr::map_chr(2) %>% readr::parse_number()
-house_area <- params %>% purrr::map_chr(3) %>% readr::parse_number()
+beds <- params |> purrr::map_chr(1) |> readr::parse_number()
+baths <- params |> purrr::map_chr(2) |> readr::parse_number()
+house_area <- params |> purrr::map_chr(3) |> readr::parse_number()
diff --git a/man/LiveHTML.Rd b/man/LiveHTML.Rd
index a568e669..5b3aaa98 100644
--- a/man/LiveHTML.Rd
+++ b/man/LiveHTML.Rd
@@ -24,11 +24,11 @@ that exposes a more powerful user interface, like
sess <- read_html_live("https://www.bodybuilding.com/exercises/finder")
sess$view()
-sess \%>\% html_elements(".ExResult-row") \%>\% length()
+sess |> html_elements(".ExResult-row") |> length()
sess$click(".ExLoadMore-btn")
-sess \%>\% html_elements(".ExResult-row") \%>\% length()
+sess |> html_elements(".ExResult-row") |> length()
sess$click(".ExLoadMore-btn")
-sess \%>\% html_elements(".ExResult-row") \%>\% length()
+sess |> html_elements(".ExResult-row") |> length()
}
}
\section{Public fields}{
diff --git a/man/html_attr.Rd b/man/html_attr.Rd
index 75ac0ccf..3a9860a4 100644
--- a/man/html_attr.Rd
+++ b/man/html_attr.Rd
@@ -32,9 +32,9 @@ html <- minimal_html('')
-html \%>\% html_elements("a") \%>\% html_attrs()
+html |> html_elements("a") |> html_attrs()
-html \%>\% html_elements("a") \%>\% html_attr("href")
-html \%>\% html_elements("li") \%>\% html_attr("class")
-html \%>\% html_elements("li") \%>\% html_attr("class", default = "inactive")
+html |> html_elements("a") |> html_attr("href")
+html |> html_elements("li") |> html_attr("class")
+html |> html_elements("li") |> html_attr("class", default = "inactive")
}
diff --git a/man/html_element.Rd b/man/html_element.Rd
index cc2e24ef..53a12a58 100644
--- a/man/html_element.Rd
+++ b/man/html_element.Rd
@@ -57,10 +57,10 @@ html <- minimal_html("
")
-sample1 \%>\%
- html_element("table") \%>\%
+sample1 |>
+ html_element("table") |>
html_table()
# Values in merged cells will be duplicated
@@ -65,8 +65,8 @@ sample2 <- minimal_html("4 y 10 z
")
-sample2 \%>\%
- html_element("table") \%>\%
+sample2 |>
+ html_element("table") |>
html_table()
# If a row is missing cells, they'll be filled with NAs
@@ -76,7 +76,7 @@ sample3 <- minimal_html("4 5 6 7
")
-sample3 \%>\%
- html_element("table") \%>\%
+sample3 |>
+ html_element("table") |>
html_table()
}
diff --git a/man/html_text.Rd b/man/html_text.Rd
index c65afdb4..a25875b2 100644
--- a/man/html_text.Rd
+++ b/man/html_text.Rd
@@ -47,17 +47,17 @@ html <- minimal_html(
# html_text() returns the raw underlying text, which includes whitespace
# that would be ignored by a browser, and ignores the 3 4
-html \%>\% html_element("p") \%>\% html_text() \%>\% writeLines()
+html |> html_element("p") |> html_text() |> writeLines()
# html_text2() simulates what a browser would display. Non-significant
# whitespace is collapsed, and
is turned into a line break
-html \%>\% html_element("p") \%>\% html_text2() \%>\% writeLines()
+html |> html_element("p") |> html_text2() |> writeLines()
# By default, html_text2() also converts non-breaking spaces to regular
# spaces:
html <- minimal_html("
-title <- films \%>\%
- html_element("h2") \%>\%
+title <- films |>
+ html_element("h2") |>
html_text2()
title
# Or use html_attr() to get data out of attributes. html_attr() always
# returns a string so we convert it to an integer using a readr function
-episode <- films \%>\%
- html_element("h2") \%>\%
- html_attr("data-id") \%>\%
+episode <- films |>
+ html_element("h2") |>
+ html_attr("data-id") |>
readr::parse_integer()
episode
}
diff --git a/man/read_html_live.Rd b/man/read_html_live.Rd
index 81489d0d..474ef478 100644
--- a/man/read_html_live.Rd
+++ b/man/read_html_live.Rd
@@ -36,15 +36,15 @@ on your machine.
# When we retrieve the raw HTML for this site, it doesn't contain the
# data we're interested in:
static <- read_html("https://www.forbes.com/top-colleges/")
-static \%>\% html_elements(".TopColleges2023_tableRow__BYOSU")
+static |> html_elements(".TopColleges2023_tableRow__BYOSU")
# Instead, we need to run the site in a real web browser, causing it to
# download a JSON file and then dynamically generate the html:
sess <- read_html_live("https://www.forbes.com/top-colleges/")
sess$view()
-rows <- sess \%>\% html_elements(".TopColleges2023_tableRow__BYOSU")
-rows \%>\% html_element(".TopColleges2023_organizationName__J1lEV") \%>\% html_text()
-rows \%>\% html_element(".grant-aid") \%>\% html_text()
+rows <- sess |> html_elements(".TopColleges2023_tableRow__BYOSU")
+rows |> html_element(".TopColleges2023_organizationName__J1lEV") |> html_text()
+rows |> html_element(".grant-aid") |> html_text()
}
}
diff --git a/man/session.Rd b/man/session.Rd
index 4e0a296a..9295b126 100644
--- a/man/session.Rd
+++ b/man/session.Rd
@@ -68,19 +68,19 @@ and \code{\link[httr:status_code]{httr::status_code()}}.
}
\examples{
s <- session("http://hadley.nz")
-s \%>\%
- session_jump_to("hadley-wickham.jpg") \%>\%
- session_jump_to("/") \%>\%
+s |>
+ session_jump_to("hadley-wickham.jpg") |>
+ session_jump_to("/") |>
session_history()
-s \%>\%
- session_jump_to("hadley-wickham.jpg") \%>\%
- session_back() \%>\%
+s |>
+ session_jump_to("hadley-wickham.jpg") |>
+ session_back() |>
session_history()
\donttest{
-s \%>\%
- session_follow_link(css = "p a") \%>\%
+s |>
+ session_follow_link(css = "p a") |>
html_elements("p")
}
}
diff --git a/tests/testthat/_snaps/session.md b/tests/testthat/_snaps/session.md
index ae4f055b..cfab7cad 100644
--- a/tests/testthat/_snaps/session.md
+++ b/tests/testthat/_snaps/session.md
@@ -7,12 +7,12 @@