Skip to content

Commit

Permalink
Allow for using subset of variants for CV
Browse files Browse the repository at this point in the history
  • Loading branch information
gaow committed Feb 4, 2024
1 parent e0e50bb commit 1d6014e
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Encoding: UTF-8
Type: Package
Package: pecotmr
Version: 0.1.26
Date: 2024-01-31
Version: 0.1.27
Date: 2024-02-03
Title: Pair-wise enrichment, colocalization, TWAS and Mendelian Randomization to integrate molecular QTL and GWAS.
Description: The majority of the statistical models in pecotmr are based on fine-mapped single effects described in Wang G et al (2020) JRSS-B. It also incoporates several other TWAS methods as well as utility functions for QTL and GWAS integration.
URL: https://github.com/cumc/pecotmr
Expand Down
12 changes: 4 additions & 8 deletions R/encoloc.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,6 @@ calculate_purity <- function(variants, ext_ld, squared) {
purity
}

#' Function to convert region df to str
convert_to_string <- function(df) {
result <- paste0("chr", df$chrom, ":", df$start, "-", df$end)
return(result)
}

#' Main processing function
#' This function is designed to summarize coloc results based on the following criteria:
#' 1. Among the colocalized variant pairs, PPH4 has the highest value compared to PPH0-PPH3.
Expand Down Expand Up @@ -225,6 +219,7 @@ process_coloc_results <- function(coloc_result, LD_meta_file_path,analysis_scrip
coloc_wrapper <- function(xqtl_file, gwas_files, xqtl_condition = NULL, gwas_condition = NULL,
finemapping_obj = "susie_result_trimmed", varname_obj = "variant_names", xqtl_region_obj = "region_info",
prior_tol = 1e-9, p1=1e-4, p2=1e-4, p12=5e-6, ...) {

# Load and process GWAS data
gwas_lbf_matrices <- lapply(gwas_files, function(file) {
raw_data <- readRDS(file)
Expand Down Expand Up @@ -275,8 +270,9 @@ coloc_wrapper <- function(xqtl_file, gwas_files, xqtl_condition = NULL, gwas_con
# Report the number of dropped columns from xQTL matrix
num_dropped_cols <- length(setdiff(colnames(xqtl_lbf_matrix), common_colnames))
message("Number of columns dropped from xQTL matrix: ", num_dropped_cols)



# Function to convert region df to str
convert_to_string <- function(df) paste0("chr", df$chrom, ":", df$start, "-", df$end)
region <- if (!is.null(xqtl_region_obj)) get_nested_element(xqtl_raw_data, xqtl_region_obj)$region %>% convert_to_string else NULL

# COLOC function
Expand Down
10 changes: 9 additions & 1 deletion R/twas.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ twas_z <- function(weights, z, R=NULL, X=NULL) {
#' @param weight_methods A list of methods and their specific arguments, formatted as list(method1 = method1_args, method2 = method2_args).
#' methods in the list can be either univariate (applied to each column of Y) or multivariate (applied to the entire Y matrix).
#' @param seed An optional integer to set the random seed for reproducibility of sample splitting.
#' @param max_num_variants An optional integer to set the randomly selected maximum number of variants to use for CV purpose, to save computing time.
#' @param num_threads The number of threads to use for parallel processing.
#' If set to -1, the function uses all available cores.
#' If set to 0 or 1, no parallel processing is performed.
Expand All @@ -65,7 +66,7 @@ twas_z <- function(weights, z, R=NULL, X=NULL) {
#' @importFrom foreach %dopar%
#' @importFrom doParallel registerDoParallel
#' @export
twas_weights_cv <- function(X, Y, fold = NULL, sample_partitions = NULL, weight_methods = NULL, seed = NULL, num_threads = 1, ...) {
twas_weights_cv <- function(X, Y, fold = NULL, sample_partitions = NULL, weight_methods = NULL, seed = NULL, max_num_variants = NULL, num_threads = 1, ...) {
split_data <- function(X, Y, sample_partition, fold){
if (is.null(rownames(X))) {
warning("Row names in X are missing. Using row indices.")
Expand Down Expand Up @@ -111,6 +112,13 @@ twas_weights_cv <- function(X, Y, fold = NULL, sample_partitions = NULL, weight_
} else {
sample_names <- 1:nrow(X)
}

# Select variants if necessary
if (!is.null(max_num_variants) && ncol(X)> max_num_variants) {
selected_columns <- sort(sample(ncol(X), max_num_variants, replace = FALSE))
message(paste("Randomly selecting", ncol(X[, selected_columns]), "out of", ncol(X), "variants for cross validation purpose."))
X <- X[, selected_columns]
}

arg <-list(...)

Expand Down
3 changes: 3 additions & 0 deletions man/twas_weights_cv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1d6014e

Please sign in to comment.