Skip to content

Commit

Permalink
Merge pull request #17 from bitarellolab/daphne-dev
Browse files Browse the repository at this point in the history
Daphne dev
  • Loading branch information
bbitarello authored Apr 18, 2024
2 parents 5ca793c + d60c89f commit 8e8fa71
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 50 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Imports:
stringr
Depends:
R (>= 2.10)
Config/testthat/edition: 3
Suggests:
knitr,
rmarkdown,
Expand Down
157 changes: 117 additions & 40 deletions R/parse_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,122 @@
#' @param type Which input format will be generated. Options: ncd1, ncd2.
#' @return Returns a data table object with columns CHR POS REF ALT tx_1 tn_1 (if type is ncd1) or CHR POS REF ALT tx_1 tn_1 tx_2 tn_2 (if type is ncd2)
#' @export
#' @examples parse_vcf(infile=system.file(package="balselr", "example.vcf"), n0=108, type="ncd1")
#' @examples parse_vcf(infile=system.file(package="balselr", "example.vcf"), n0=108, n1=1, type="ncd2")
#' @param verbose Logical. Printed updates. Default is T.

# to do: test that outputs of both commands in the examples have the same values for tx_1 and tn_1
parse_vcf <- function(infile = NULL,
n0 = NULL,
n1 = NULL,
type = NULL,
verbose=F) {

assertthat::assert_that(file.exists(infile),
msg = glue::glue("VCF file {infile} does not exist.\n"))
#to do: check that n0 and n1 meet requirements outlined above.
#to do: check that vcf file has at least the number of individuals provided in n0+n1 (i.e., if users says n0=10, n1=1 but vcf only has 8 individuals, this should return an error as early as possible)
type <- tolower(type)
assertthat::assert_that(type %in% c("ncd1", "ncd2"), msg = "ncd_type must be either ncd1 or ncd2.\n")
if (type == "ncd2") {
assertthat::assert_that(is.null(n1) == FALSE, msg = "n1 cannot be 'NULL'. NCD2 requires an outgroup at least one individual as an outgroup.")
#'
#' @examples parse_vcf(infile="inst/example.vcf", outfile="inst/example_parse_ncd1.out", n0=108, type="ncd1")
#'parse_vcf(infile="inst/example.vcf",n0=108, n1=1, type="ncd2", outfile="example_parse_ncd2.out")

parse_vcf <-
function(infile = "*.vcf",
outfile = NULL,
n0= NULL,
n1= NULL,
type = NULL,
fold = T,
intern = T,
verbose = T) {
tictoc::tic("Total runtime")
assertthat::assert_that(file.exists(infile),
msg = glue::glue("VCF file {infile} does not exist.\n")
)
assertthat::assert_that(is.numeric(n0)
)
format_type <- function(type) {
incorrect_types_ncd1 <- c("ncd1", "ned1", "ncd_1", "ned_1")
incorrect_types_ncd2 <- c("ncd2", "ned2", "ncd_2", "ned_2")

type <- tolower(type)
if (type %in% incorrect_types_ncd1) {
type <- "ncd1"
} else if (type %in% incorrect_types_ncd2) {
type <- "ncd2"
}

return(type)

}
inp <- read_vcf(x = infile)
nind<-c(n0,n1)
index.col <- which(colnames(inp) == "FORMAT") + 1
if (type == "ncd2") {
res <-
.vcf_ncd2(
x = inp,
nind = nind,
index.col = index.col,
verbose = verbose
)
} else if (type == "ncd1") {
res <-
.vcf_ncd1(
x = inp,
nind = nind,
index.col = index.col,
verbose = verbose
)
if (is.null(type)) {
print("You must choose either 'ncd1' or 'ncd2'")
return(NULL)
}
return(res)
}
# Index No. of the individual to use as ``ancestral'' sequence
if (fold == F & type == "ncd2") {
outseq <- (index.col) + (sum(nind) - 1)
if (verbose == T) {
cat(
glue::glue(
"You asked for the unfolded version. You need a third species. I will use column {outseq} for this."
),
"\n"
)
}
}else if (type == "ncd2"){
assertthat::assert_that(is.null(n1)==FALSE, msg="n1 cannot be NULL. NCD2 requires an outgroup.")
}
else if (type == "ncd1") {
assertthat::assert_that(fold == T, msg = "Only the folded option is available when Ancestral/Derived states are unknown.\n")
}
#
if (is.null(outfile)) {
outfile = outfile_path(glue::glue("{infile}"))
if (verbose == T) {
cat(
glue::glue(
"No outfile provided. Will write this into tmp file {outfile}_{type}.out"
),
"\n"
)
}
}else{
outfile <- glue::glue("{outfile}_{type}.out")
}

nind<-c(n0,n1)
pref <- gsub(".vcf", "", infile)
inp <- read_vcf(x = infile, only.bi = T)

index.col <- which(colnames(inp) == "FORMAT") + 1
if (verbose == T) {
cat(
glue::glue(
"First genotype column is {index.col}"
), "\n"
)
}
if (verbose == T) {
cat(
glue::glue("Creating input file for {type} from {infile}..."),
"\n"
)
}


#
if (type == "ncd2") {
res <-
.vcf_ncd2(
x = inp,
outfile = outfile,
nind = nind,
index.col = index.col,
fold = fold,
verbose = verbose
)
} else if (type == "ncd1") {
res <-
.vcf_ncd1(
x = inp,
outfile = outfile,
nind = nind,
index.col = index.col,
verbose = verbose
)
}
if (verbose == T) {
cat(
glue::glue("Finished making input file for {type}. File written to: {outfile}"),
"\n"
)
tictoc::toc()
} else {
cat(glue::glue("File written to: {outfile}"), "\n")
}
3 changes: 1 addition & 2 deletions R/read_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@ read_vcf <- function(x = "inst/example.vcf") {
inp <- data.table::fread(x, skip = "##", header = T)
data.table::setnames(inp, "#CHROM", "CHR")
#inp <-
inp[REF %in% c("A", "C", "T", "G")][ALT %in% c("A", "C", "T", "G")]
#return(inp)
inp[REF %in% c("A", "C", "T", "G")][ALT %in% c("A", "C", "T", "G")]
}
3 changes: 0 additions & 3 deletions data-raw/DATASET.R
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
## code to prepare `DATASET` dataset goes here

usethis::use_data(DATASET, overwrite = TRUE)
5 changes: 0 additions & 5 deletions tests/testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,3 @@
# Learn more about the roles of various files in:
# * https://r-pkgs.org/tests.html
# * https://testthat.r-lib.org/reference/test_package.html#special-files

library(testthat)
library(balselr)

test_check("balselr")

0 comments on commit 8e8fa71

Please sign in to comment.