Skip to content

Commit

Permalink
NAs in sign.test, plus Nsigntest. Close #326
Browse files Browse the repository at this point in the history
  • Loading branch information
eheinzen committed May 7, 2021
1 parent a251211 commit 1f853e8
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 9 deletions.
5 changes: 4 additions & 1 deletion R/paired.R
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ paired <- function(formula, data, id, na.action, subset=NULL, strata, control =
bystatlist <- do.call(statfun, list(currcol, levels = xlevels,
by = by.col, by.levels = by.levels, na.rm = TRUE))
bystatlist$Total <- NULL
} else if(statfun2 == "Nsigntest") {
bystatlist <- as.countpct(NA_real_)
} else
{
for(bylev in by.levels) {
Expand Down Expand Up @@ -346,7 +348,8 @@ paired <- function(formula, data, id, na.action, subset=NULL, strata, control =
currtest <- if(nchar(specialTests[eff]) > 0) specialTests[eff] else currtest
testout <- if(control$test) {
eval(call(currtest, TP1.eff, TP2.eff, mcnemar.correct=control$mcnemar.correct,
signed.rank.exact = control$signed.rank.exact, signed.rank.correct = control$signed.rank.correct))
signed.rank.exact = control$signed.rank.exact, signed.rank.correct = control$signed.rank.correct,
test.always=control$test.always))
} else notest()

xList[[eff]] <- list(stats=statList, test=testout, type=vartype)
Expand Down
28 changes: 24 additions & 4 deletions R/paired.stat.tests.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,49 @@

paired.t <- function(x, y, ...) {
paired.t <- function(x, y, ..., na.rm = TRUE) {
if(is.Date(x) && is.Date(y))
{
x <- as.integer(x)
y <- as.integer(y)
}
if(na.rm) {
idx <- is.na(x) | is.na(y)
x <- x[!idx]
y <- y[!idx]
}
stats::t.test(x, y, paired = TRUE)
}

mcnemar <- function(x, y, mcnemar.correct = TRUE, ...)
mcnemar <- function(x, y, mcnemar.correct = TRUE, ..., na.rm = TRUE)
{
if(na.rm) {
idx <- is.na(x) | is.na(y)
x <- x[!idx]
y <- y[!idx]
}
stats::mcnemar.test(x, y, correct = mcnemar.correct)
}

signed.rank <- function(x, y, signed.rank.exact = NULL, signed.rank.correct = TRUE, ...)
signed.rank <- function(x, y, signed.rank.exact = NULL, signed.rank.correct = TRUE, ..., na.rm = TRUE)
{
if(is.ordered(x) && is.ordered(y))
{
x <- as.integer(x)
y <- as.integer(y)
}
if(na.rm) {
idx <- is.na(x) | is.na(y)
x <- x[!idx]
y <- y[!idx]
}
stats::wilcox.test(x, y, paired = TRUE, exact = signed.rank.exact, correct = signed.rank.correct)
}

sign.test <- function(x, y, ...)
sign.test <- function(x, y, ..., na.rm = TRUE)
{
if(na.rm) {
idx <- is.na(x) | is.na(y)
x <- x[!idx]
y <- y[!idx]
}
stats::binom.test(c(sum(x > y), sum(x < y)))
}
2 changes: 1 addition & 1 deletion R/tableby.control.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ add_tbc_stats_labels <- function(x) {
mean = "Mean", sd = "SD", var = "Var", max = "Max", min = "Min", meanCI = "Mean (CI)", sum = "Sum",
gmean = "Geom Mean", gsd = "Geom SD", gmeansd = "Geom Mean (Geom SD)", gmeanCI = "Geom Mean (CI)",
range="Range", Npct="N (Pct)", Nevents="Events", medSurv="Median Survival",
medTime = "Median Follow-Up", medianmad="Median (MAD)",
medTime = "Median Follow-Up", medianmad="Median (MAD)", Nsigntest = "N (sign test)",
overall = "Overall", total = "Total", difference = "Difference"
)
nms <- setdiff(names(x), "")
Expand Down
10 changes: 8 additions & 2 deletions R/tableby.stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ gmeanCI <- function(x, na.rm=TRUE, weights = NULL, conf.level = 0.95, ...) {
as.tbstat(y, parens = c("(", ")"), sep2 = ", ")
}

#' @rdname tableby.stats
#' @export
Nsigntest <- function(x, na.rm = TRUE, weights = NULL, ...) {
if(is.null(weights)) weights <- rep(1, NROW(x))
as.countpct(sum(weights*(x != 0), na.rm = na.rm))
}

## survival stats
#' @rdname tableby.stats
Expand Down Expand Up @@ -379,7 +385,7 @@ iqr <- function(x, na.rm=TRUE, weights = NULL, ...) {
#' @export
Nmiss <- function(x, na.rm=TRUE, weights = NULL, ...) {
if(is.null(weights)) weights <- rep(1, NROW(x))
as.countpct(sum(weights[is.na(x)]))
as.countpct(sum(weights, na.rm = na.rm))
}

## Nmiss2 make similar, but in tableby, always keep nmiss,
Expand All @@ -393,7 +399,7 @@ Nmiss2 <- Nmiss
#' @export
N <- function(x, na.rm=TRUE, weights = NULL, ...) {
if(is.null(weights)) weights <- rep(1, NROW(x))
as.countpct(sum(weights[!is.na(x)]))
as.countpct(sum(weights, na.rm = na.rm))
}

#' @rdname tableby.stats
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/test_paired.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,22 @@ test_that("12/27/2019: informative error when no stats are computed (#273)", {
expect_error(summary(paired(tp ~ Cat, data = dat2, id = id, cat.stats = "Nmiss")), "Nothing to show for variable")
})

test_that("NAs in sign.test, plus Nsigntest (#326)", {
d <- data.frame(
tp = rep(c("Time 1", "Time 2"), times = 4),
id = c(1, 1, 2, 2, 3, 3, 4, 4),
a = c(1, 2, 2, 3, 3, 4, 5, NA)
)
expect_identical(
capture.kable(summary(paired(tp ~ sign.test(a), id = id, data = d, numeric.stats = c("Nmiss", "meansd", "range", "Nsigntest")), text = TRUE)),
c("| | Time 1 (N=4) | Time 2 (N=4) | Difference (N=4) | p value|",
"|:----------------|:-------------:|:-------------:|:----------------:|-------:|",
"|a | | | | 0.250|",
"|- N-Miss | 4 | 4 | 4 | |",
"|- Mean (SD) | 2.750 (1.708) | 3.000 (1.000) | 1.000 (0.000) | |",
"|- Range | 1.000 - 5.000 | 2.000 - 4.000 | 1.000 - 1.000 | |",
"|- N (sign test) | NA | NA | 3 | |"
)
)
})

2 changes: 1 addition & 1 deletion tests/testthat/test_tableby.R
Original file line number Diff line number Diff line change
Expand Up @@ -1707,7 +1707,7 @@ test_that("wt (#321)", {
})


test_that("wt (#327)", {
test_that("medtest (#327)", {
expect_identical(
capture.kable(summary(tableby(sex ~ medtest(age), data = mockstudy), text = TRUE)),
c("| | Male (N=916) | Female (N=583) | Total (N=1499) | p value|",
Expand Down

3 comments on commit 1f853e8

@alexflaris
Copy link

@alexflaris alexflaris commented on 1f853e8 May 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello again,

Thank you for your prompt reply and for modifying the package to account for NAs. The sign.test is working correctly.

The bug now is:
-the "N" option displays always the number of subjects in the group originally present, instead of how many where actually used for the calculation of the summary statistic. Same bug with Nmiss. I went back to the previous code for "N" and "Nmiss" and the problem was solved.

What I mean is that previously the table would show:

 "|                        | Time 1 (N=50)  | Time 2 (N=50)  | Difference (N=50) | p value|",
 "|:----------------|:-----------------|:------------------|:--------------------:|--------:|",
 "|                         |                         |                           |                               |   0.250|",
 "|:  N                   |        47              |        45               |        42                   |            |",

and now it shows:

 "|                        | Time 1 (N=50)  | Time 2 (N=50)  | Difference (N=50) | p value|",
 "|:----------------|:-----------------|:------------------|:--------------------:|--------:|",
 "|                         |                         |                           |                               |   0.250|",
 "|:  N                   |        50              |        50               |       50                   |            |",

Which is clearly wrong, since there are patients missing.

Also for Nsigntest, I believe that the most informative position would be next to the p-value instead of a separate line as in the following table (possibly with a footnote?)

 "|                        | Time 1 (N=50)  | Time 2 (N=50)  | Difference (N=50) | p value               |",
 "|:----------------|:-----------------|:------------------|:--------------------:|------------------:|",
 "|                         |                         |                           |                               |   0.250 (N = 32)|",
 "|:  N                   |        47              |        45               |        42                   |                          |",

Finally, for the difference column, especially when the summary statistic displayed is "medianmad", it would be very informative to show how many differences were negative, how many were 0 and how many were positive in order to have a better idea of the underlying observations (the string could be something like N/Z/P or -/0+ with or without the percentages of the observations depending on how crowded the lines are; or the percentages could be on a separate line):

 "|                        | Time 1 (N=50)  | Time 2 (N=50)  | Difference (N=50) | p value               |",
 "|:-------------|:-----------------|:------------------|:--------------------:|------------------:|",
 "|                         |                         |                           |                               |   0.250 (N = 32)|",
 "|:  N                    |       47              |        45               |        42                   |                          |",
 "|  Median(MAD) | XX (XX)            | XX (XX)              | XX (XX)                  |                           |",
 "|  -/0/+ (%)         |                         |                           |  5/10/20 (A%/B%/C%)|                           |",

Thank you again for a great package.
Alex

@eheinzen
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Dunno what I was thinking there. Chalk it up to a Friday afternoon, I guess? Sorry about that.

I'll track the other two requests in separate issues (#328 and #329)

@alexflaris
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's exactly what I thought! :)
Thanx again for taking care of it fast.

Please sign in to comment.