Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Daphne dev #17

Merged
merged 24 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Imports:
stringr
Depends:
R (>= 2.10)
Config/testthat/edition: 3
Suggests:
knitr,
rmarkdown,
Expand Down
157 changes: 117 additions & 40 deletions R/parse_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,122 @@
#' @param type Which input format will be generated. Options: ncd1, ncd2.
#' @return Returns a data table object with columns CHR POS REF ALT tx_1 tn_1 (if type is ncd1) or CHR POS REF ALT tx_1 tn_1 tx_2 tn_2 (if type is ncd2)
#' @export
#' @examples parse_vcf(infile=system.file(package="balselr", "example.vcf"), n0=108, type="ncd1")
#' @examples parse_vcf(infile=system.file(package="balselr", "example.vcf"), n0=108, n1=1, type="ncd2")
#' @param verbose Logical. Printed updates. Default is T.

# to do: test that outputs of both commands in the examples have the same values for tx_1 and tn_1
parse_vcf <- function(infile = NULL,
n0 = NULL,
n1 = NULL,
type = NULL,
verbose=F) {

assertthat::assert_that(file.exists(infile),
msg = glue::glue("VCF file {infile} does not exist.\n"))
#to do: check that n0 and n1 meet requirements outlined above.
#to do: check that vcf file has at least the number of individuals provided in n0+n1 (i.e., if users says n0=10, n1=1 but vcf only has 8 individuals, this should return an error as early as possible)
type <- tolower(type)
assertthat::assert_that(type %in% c("ncd1", "ncd2"), msg = "ncd_type must be either ncd1 or ncd2.\n")
if (type == "ncd2") {
assertthat::assert_that(is.null(n1) == FALSE, msg = "n1 cannot be 'NULL'. NCD2 requires an outgroup at least one individual as an outgroup.")
#'
#' @examples parse_vcf(infile="inst/example.vcf", outfile="inst/example_parse_ncd1.out", n0=108, type="ncd1")
#'parse_vcf(infile="inst/example.vcf",n0=108, n1=1, type="ncd2", outfile="example_parse_ncd2.out")

parse_vcf <-
function(infile = "*.vcf",
outfile = NULL,
n0= NULL,
n1= NULL,
type = NULL,
fold = T,
intern = T,
verbose = T) {
tictoc::tic("Total runtime")
assertthat::assert_that(file.exists(infile),
msg = glue::glue("VCF file {infile} does not exist.\n")
)
assertthat::assert_that(is.numeric(n0)
)
format_type <- function(type) {
incorrect_types_ncd1 <- c("ncd1", "ned1", "ncd_1", "ned_1")
incorrect_types_ncd2 <- c("ncd2", "ned2", "ncd_2", "ned_2")

type <- tolower(type)
if (type %in% incorrect_types_ncd1) {
type <- "ncd1"
} else if (type %in% incorrect_types_ncd2) {
type <- "ncd2"
}

return(type)

}
inp <- read_vcf(x = infile)
nind<-c(n0,n1)
index.col <- which(colnames(inp) == "FORMAT") + 1
if (type == "ncd2") {
res <-
.vcf_ncd2(
x = inp,
nind = nind,
index.col = index.col,
verbose = verbose
)
} else if (type == "ncd1") {
res <-
.vcf_ncd1(
x = inp,
nind = nind,
index.col = index.col,
verbose = verbose
)
if (is.null(type)) {
print("You must choose either 'ncd1' or 'ncd2'")
return(NULL)
}
return(res)
}
# Index No. of the individual to use as ``ancestral'' sequence
if (fold == F & type == "ncd2") {
outseq <- (index.col) + (sum(nind) - 1)
if (verbose == T) {
cat(
glue::glue(
"You asked for the unfolded version. You need a third species. I will use column {outseq} for this."
),
"\n"
)
}
}else if (type == "ncd2"){
assertthat::assert_that(is.null(n1)==FALSE, msg="n1 cannot be NULL. NCD2 requires an outgroup.")
}
else if (type == "ncd1") {
assertthat::assert_that(fold == T, msg = "Only the folded option is available when Ancestral/Derived states are unknown.\n")
}
#
if (is.null(outfile)) {
outfile = outfile_path(glue::glue("{infile}"))
if (verbose == T) {
cat(
glue::glue(
"No outfile provided. Will write this into tmp file {outfile}_{type}.out"
),
"\n"
)
}
}else{
outfile <- glue::glue("{outfile}_{type}.out")
}

nind<-c(n0,n1)
pref <- gsub(".vcf", "", infile)
inp <- read_vcf(x = infile, only.bi = T)

index.col <- which(colnames(inp) == "FORMAT") + 1
if (verbose == T) {
cat(
glue::glue(
"First genotype column is {index.col}"
), "\n"
)
}
if (verbose == T) {
cat(
glue::glue("Creating input file for {type} from {infile}..."),
"\n"
)
}


#
if (type == "ncd2") {
res <-
.vcf_ncd2(
x = inp,
outfile = outfile,
nind = nind,
index.col = index.col,
fold = fold,
verbose = verbose
)
} else if (type == "ncd1") {
res <-
.vcf_ncd1(
x = inp,
outfile = outfile,
nind = nind,
index.col = index.col,
verbose = verbose
)
}
if (verbose == T) {
cat(
glue::glue("Finished making input file for {type}. File written to: {outfile}"),
"\n"
)
tictoc::toc()
} else {
cat(glue::glue("File written to: {outfile}"), "\n")
}
3 changes: 1 addition & 2 deletions R/read_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@ read_vcf <- function(x = "inst/example.vcf") {
inp <- data.table::fread(x, skip = "##", header = T)
data.table::setnames(inp, "#CHROM", "CHR")
#inp <-
inp[REF %in% c("A", "C", "T", "G")][ALT %in% c("A", "C", "T", "G")]
#return(inp)
inp[REF %in% c("A", "C", "T", "G")][ALT %in% c("A", "C", "T", "G")]
}
3 changes: 0 additions & 3 deletions data-raw/DATASET.R
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
## code to prepare `DATASET` dataset goes here

usethis::use_data(DATASET, overwrite = TRUE)
5 changes: 0 additions & 5 deletions tests/testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,3 @@
# Learn more about the roles of various files in:
# * https://r-pkgs.org/tests.html
# * https://testthat.r-lib.org/reference/test_package.html#special-files

library(testthat)
library(balselr)

test_check("balselr")
Loading