-
Notifications
You must be signed in to change notification settings - Fork 0
/
racingchartscript
103 lines (78 loc) · 3.48 KB
/
racingchartscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#load the necessary packages
library(rvest)
library(tidyverse)
library(lubridate)
library(purrr)
options(scipen = 999)
library(gganimate)
library(glue)
#list of countries for which daily data is available
list_countries <- c("china", "italy", "iran", "south-korea", "spain", "germany", "france", "us", "uk")
#days to track
days_since <- 50
#data source
base_url <- "https://www.worldometers.info/coronavirus/country"
#web scraping function to get the data from the source, process data, and save in 'day' and 'number' variables
get_data <- function(page){
url <- glue("https://www.worldometers.info/coronavirus/country/{page}")
xml_doc <- read_html(url)
nodes <- xml_doc %>% html_nodes("script")
total_cases <- nodes[24] %>% html_text()
days <- gsub(".*categories:*(.*?) *yAxis.*", "\\1", total_cases) %>%
gsub("\\[|\\]|}", "", .) %>%
gsub('\"', "", ., fixed = TRUE) %>% str_trim()
days <- days %>% str_split_fixed(",", n= days_since) %>% as.vector() %>% as.Date(., "%b%d")
numbers <- sub(".*data:*(.*?) *responsive.*", "\\1", total_cases) %>%
gsub("\\[|\\]|}", "", .) %>% str_replace("null", "0") %>%
str_trim()
numbers <- numbers %>% str_split_fixed(",", n= days_since) %>% as.vector() %>% as.numeric()
data.frame(day = days, number = numbers)
}
#build the scraped data into a tidy dataframe
b <- lapply(list_countries, get_data)
china <- b[[1]] %>% mutate(Country = rep("China", days_since))
italy <- b[[2]] %>% mutate(Country = rep("Italy", days_since))
iran <- b[[3]] %>% mutate(Country = rep("Iran", days_since))
skorea <- b[[4]] %>% mutate(Country = rep("South Korea", days_since))
spain <- b[[5]] %>% mutate(Country = rep("Spain", days_since))
germany <- b[[6]] %>% mutate(Country = rep("Germany", days_since))
france <- b[[7]] %>% mutate(Country = rep("France", days_since))
us <- b[[8]] %>% mutate(Country = rep("USA", days_since))
uk <- b[[9]] %>% mutate(Country = rep("UK", days_since))
overall_data <- rbind(china, italy, iran, skorea, spain, germany, france, us, uk)
#line plot of cumulative numbers
overall_data %>%
ggplot(aes(day, number, col = Country)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_y_log10()
#remove China from the data for a more proportional graph
data <- overall_data %>%
group_by(day) %>%
filter(Country != "China") %>%
arrange(-number) %>%
mutate(rank = row_number())
#create an animated bar graph showing change in numbers as days go by
p <- data %>%
group_by(day) %>%
mutate(Rank = rank(-number),
Value_rel = number/number[Rank==1],
Value_lbl = paste0(" ", number)) %>%
ggplot(aes(-Rank,Value_rel, fill = Country)) +
geom_col(width = 0.9, position = "identity") +
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(limits = c(-9,0)) +
geom_text(aes(-Rank,y=0,label = paste(Country,"")),hjust=1, vjust =0.2) +
geom_text(aes(-Rank,y=Value_rel,label = Value_lbl, hjust=0)) +
theme_minimal() +
theme(legend.position = "none",axis.title = element_blank()) +
labs(title='Date: {closest_state}', x = "", y = "",
caption = "Data from https://www.worldometers.info/coronavirus \n
Plot by philgR") +
theme(plot.title = element_text(hjust = 0, size = 18),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(2,2,2,4, "cm")) +
transition_states(day,4,1)
animate(p, 100, fps = 25, duration = 20, width = 800, height = 600)