Skip to content

Commit

Permalink
Merge pull request #201 from CDU-data-science-team/fix-data-upload
Browse files Browse the repository at this point in the history
temporarily Fix data upload
  • Loading branch information
asegun-cod authored Nov 16, 2023
2 parents bdc8b0b + 89a8a20 commit 3c0f1c3
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 22 deletions.
11 changes: 9 additions & 2 deletions API_url_tracker.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "API URL tracker"
author: "Oluwasegun Apejoye"
author: "Experiences Dashboard"
date: "2023-09-04"
output: html_document
---
Expand All @@ -25,6 +25,12 @@ conn <- odbc::dbConnect(
Port = 3306
)
# connect to a pin board to save the prediction in case database writing fails.
board <- pins::board_connect()
# OR
# # Set board to Null if database writing is no longer an issue
# board = NULL
pending_jobs <- dplyr::tbl(
conn,
dbplyr::in_schema(
Expand All @@ -44,7 +50,8 @@ Sys.sleep(2) # Sleep for 5 seconds to allow any pending tasks to start in the AP
if (nrow(pending_jobs) > 0) {
pending_jobs |>
apply(1, track_api_job,
conn = conn, write_db = TRUE
conn = conn, write_db = TRUE,
board = board
)
} else {
paste("No pending job")
Expand Down
91 changes: 91 additions & 0 deletions Local_API_url_tracker.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
---
title: "Write the predictions for all completed jobs to Database"
author: "Experiences dashboard"
date: 2023/11/14
format:
html:
embed-resources: true
---

```{r}
#| include: false
library(DBI)
library(odbc)
library(dplyr)
library(pins)
```



## Intro

Use this Script to manually write the prediction for all completed jobs that couldn't be auto written to the database by the scheduled API_url_tracker on Connect.
This Script won't be needed if the [issue with the database upload](https://github.com/CDU-data-science-team/experiencesdashboard/issues/200) has been resolved.

```{r}
#| message: false
conn <- odbc::dbConnect(
drv = odbc::odbc(),
driver = Sys.getenv("odbc_driver"),
server = Sys.getenv("HOST_NAME"),
UID = Sys.getenv("DB_USER"),
PWD = Sys.getenv("MYSQL_PASSWORD"),
database = "TEXT_MINING",
Port = 3306,
encoding = "UTF-8"
)
# connect to strategy unit Connect server
board <- pins::board_connect()
pending_jobs <- dplyr::tbl(
conn,
dbplyr::in_schema(
"TEXT_MINING",
"api_jobs"
)
) |>
dplyr::filter(status == "completed") |>
dplyr::collect()
```


```{r}
if (nrow(pending_jobs) > 0) {
for (i in 1:nrow(pending_jobs)) {
job <- pending_jobs[i, ]
job_id <- as.character(job["job_id"])
trust_id <- as.character(job["trust_id"])
board_path <- as.character(job["pin_path"])
# get the prediction from the board
prediction <- pins::pin_read(board, board_path)
# update the main table on the database
dplyr::rows_update(
dplyr::tbl(conn, trust_id),
prediction,
by = "comment_id",
unmatched = "ignore",
copy = TRUE,
in_place = TRUE
)
# update the job status as uploaded (successfully write prediction to main table)
DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='uploaded' WHERE job_id =", job_id))
# delete the prediction from the board
pins::pin_delete(board, board_path)
DBI::dbExecute(
conn,
sprintf("UPDATE api_jobs SET pin_path ='%s' WHERE job_id = %s", NA, job_id)
)
cat("Job", job_id, "prediction has been successfully written to database \n")
}
} else {
cat("No uncompleted job")
}
```
2 changes: 1 addition & 1 deletion R/app_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ app_server <- function(input, output, session) {
dplyr::arrange(date)
}

# Transform the sentiment column
# Transform the sentiment
return_data <- return_data %>%
transform_sentiment() %>%
drop_na_by_col(c('category', 'super_category', 'sentiment'))
Expand Down
28 changes: 25 additions & 3 deletions R/fct_api_pred.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,16 @@ transform_prediction_for_database <- function(prediction) {
#' @param job an instance of the api job table
#' @param conn database connection
#' @param write_db logical should the prediction data be written to the database or returned as a dataframe?
#'
#' @param board a pin board to temporary write the prediction incase database writing fails
#'
#' @return dataframe (if `write_db` is FALSE)
#' @export
track_api_job <- function(job, conn, write_db = TRUE) {
track_api_job <- function(job, conn, write_db = TRUE, board = NULL) {
job_id <- as.character(job["job_id"])
url <- as.character(job["url"])
trust_id <- as.character(job["trust_id"])
board_name <- paste0(trust_id, "_prediction")
write_to_board <- !is.null(board)

cat("Checking Job", job_id, "\n")
prediction <- NULL
Expand Down Expand Up @@ -136,6 +139,15 @@ track_api_job <- function(job, conn, write_db = TRUE) {

prediction <- prediction |>
transform_prediction_for_database()

# write the prediction to a board in case it fails to write to database.
# it will be deleted if database writing is successful but if not
# it can then be picked up later for local database writing
if (write_to_board) {
board_path <- pins::pin_write(board, x = prediction, name = board_name,
type = "rds", versioned = FALSE)
DBI::dbExecute(conn, sprintf("UPDATE api_jobs SET pin_path ='%s' WHERE job_id = %s", board_path, job_id))
}

# update the main table
cat("Updating database with prediction \n")
Expand All @@ -151,8 +163,18 @@ track_api_job <- function(job, conn, write_db = TRUE) {

# update the job status as uploaded (successfully write prediction to main table)
DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='uploaded' WHERE job_id =", job_id))


# delete the trust's prediction from the board if successfully written to database
if (write_to_board) {
pins::pin_delete(board, board_path)
DBI::dbExecute(
conn,
sprintf("UPDATE api_jobs SET pin_path ='%s' WHERE job_id = %s", NA, job_id)
)
}

cat("Job", job_id, "prediction has been successfully written to database \n")

} else if (is.character(prediction)) {
cat("Job", job_id, "is still busy \n")
} else {
Expand Down
8 changes: 4 additions & 4 deletions R/mod_data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ mod_data_management_ui <- function(id) {
fluidPage(
tags$br(),
fluidRow(
p("
This page is for users who wants to upload new data or amend the
existing data in the dashboard
"),
strong("
This page is only for users who wants to upload new data or amend the
existing data in the dashboard.
") |> p(),
column(
width = 1,
actionButton(ns("upload_new_data"), "Upload new data",
Expand Down
10 changes: 6 additions & 4 deletions dev/create_trust_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@
#' @param pool the database connection
#' @param set_trust_id the name of the trust table, should be same as `get_golem_config('trust_name')`
#' @param drop_table if the trust table already exist, should it be dropped and recreated or an error be thrown.
#'
#' @param default_trust trust to use as template.
#'
#' @return zero if operation is successful
#' @examples create_trust_table(pool, set_trust_id = "trust_a_bk")
#' @noRd
create_trust_table <- function(pool, set_trust_id, drop_table = FALSE) {
create_trust_table <- function(pool, set_trust_id, default_trust = "phase_2_demo", drop_table = FALSE) {
tryCatch(
{
query <- paste0("CREATE TABLE ", set_trust_id, " AS (SELECT * FROM phase_2_demo WHERE 1=2)")
query <- sprintf("CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=2)", set_trust_id, default_trust)
DBI::dbExecute(pool, query)
},
error = function(e) {
if (drop_table) {
DBI::dbExecute(pool, paste0("DROP TABLE IF EXISTS ", set_trust_id))
query <- paste0("CREATE TABLE ", set_trust_id, " AS (SELECT * FROM phase_2_demo WHERE 1=2)")
query <- sprintf("CREATE TABLE %s AS (SELECT * FROM %s WHERE 1=2)", set_trust_id, default_trust)
DBI::dbExecute(pool, query)
} else {
stop("Table already exist")
Expand All @@ -48,6 +49,7 @@ create_job_table <- function(conn) {
trust_id tinytext NOT NULL,
user tinytext NOT NULL,
email tinytext,
pin_path text,
status tinytext NOT NULL CHECK (status IN ('submitted', 'completed', 'failed', 'uploaded')),
PRIMARY KEY (job_id)
)"
Expand Down
4 changes: 3 additions & 1 deletion man/track_api_job.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 30 additions & 3 deletions renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,8 @@
"RemoteHost": "api.github.com",
"RemoteRepo": "experiencesdashboard",
"RemoteUsername": "CDU-data-science-team",
"RemoteRef": "HEAD",
"RemoteSha": "cf5573fa7cfb4e022cb0365b8a9f065904edd832",
"RemoteRef": "fix-data-upload",
"RemoteSha": "934aac6c8e75c6bd4535059ee615a5c464dc3f48",
"Requirements": [
"ComplexUpset",
"DBI",
Expand Down Expand Up @@ -814,7 +814,7 @@
"writexl",
"xml2"
],
"Hash": "cd3886b31934799c6f136eefed593c44"
"Hash": "e1b46af7abfdfd8f5cfdb7cb55f893b7"
},
"fansi": {
"Package": "fansi",
Expand Down Expand Up @@ -1509,6 +1509,33 @@
],
"Hash": "15da5a8412f317beeee6175fbc76f4bb"
},
"pins": {
"Package": "pins",
"Version": "1.3.0",
"Source": "Repository",
"Repository": "RSPM",
"Requirements": [
"R",
"cli",
"digest",
"ellipsis",
"fs",
"generics",
"glue",
"httr",
"jsonlite",
"lifecycle",
"magrittr",
"purrr",
"rappdirs",
"rlang",
"tibble",
"whisker",
"withr",
"yaml"
],
"Hash": "e240e373ac8805080423d0fb985d87b0"
},
"pkgbuild": {
"Package": "pkgbuild",
"Version": "1.4.2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ account: oluwasegun.apejoye
server: connect.strategyunitwm.nhs.uk
hostUrl: https://connect.strategyunitwm.nhs.uk/__api__
appId: 149
bundleId: 930
bundleId: 1092
url: https://connect.strategyunitwm.nhs.uk/api_tracker/
version: 1
asMultiple: FALSE
Expand Down
8 changes: 5 additions & 3 deletions tests/testthat/_snaps/app_ui.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@
<br/>
<div class="row">
<p>
This page is for users who wants to upload new data or amend the
existing data in the dashboard
</p>
<strong>
This page is only for users who wants to upload new data or amend the
existing data in the dashboard.
</strong>
</p>
<div class="col-sm-1">
<button id="id-upload_new_data" type="button" class="btn btn-default action-button">
<i class="fas fa-person-circle-plus" role="presentation" aria-label="person-circle-plus icon"></i>
Expand Down

0 comments on commit 3c0f1c3

Please sign in to comment.