-
Notifications
You must be signed in to change notification settings - Fork 0
/
est_alpha_ind.R
97 lines (76 loc) · 2.79 KB
/
est_alpha_ind.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
##### Estimation of alpha using individual-level data#####
# Vary N=5000, 10000,
# h2=0.2, 0.4, 0.6, 0.8 to get
# Figures S40 in Supplementary Document
library(LSMM)
library(MASS)
# function to generate data
generate_data_ind <- function(M, N, L, K, Z.perc, A.perc, beta0, b, omega, sigma2, h2){
# genotype data
X <- matrix(1, N, M)
X1 <- matrix(runif(M*N), N, M)
f <- runif(M, 0.05, 0.5)
f2 <- matrix(rep(f^2, each = N), N, M)
f1_2 <- matrix(rep((1 - f)^2, each = N), N, M)
X[X1 < f2] <- 2
X[(1 - X1) < f1_2] <- 0
# design matrix of fixed effects
Z <- rep(0, M*L)
indexZ <- sample(M*L, M*L*Z.perc)
Z[indexZ] <- 1
Z <- matrix(Z, M, L)
# design matrix of random effects
A <- rep(0, M*K)
indexA <- sample(M*K, M*K*A.perc)
A[indexA] <- 1
A <- matrix(A, M, K)
# eta (latent variable which indicate whether the annotation is relevant to the phenotype)
eta <- rep(0, K)
indexeta <- sample(K, K*omega)
eta[indexeta] <- 1
# beta (random effects)
beta <- rep(0, K)
beta[indexeta] <- rnorm(K*omega, 0, sqrt(sigma2))
# gamma (latent variable which indicate whether the SNP is associated with the phenotype)
pi1 <- sigma(beta0 + Z %*% b + A %*% beta)
gamma <- rep(0, M)
indexgamma <- (runif(M) < pi1)
gamma[indexgamma] <- 1
# effect size
beta_SNP <- numeric(M)
beta_SNP[indexgamma] <- rnorm(sum(indexgamma), 0, 1)
# environment effect
e <- rnorm(N, 0, sqrt((1/h2-1)*var(X%*%beta_SNP)))
# phenotype
y <- X%*%beta_SNP + e
# p-value
Pvalue <- t(apply(X, 2, function(X.col) summary(lm(y ~ X.col))$coefficients[2,4]))
return( list(Pvalue = Pvalue, Z = Z, A = A, beta = beta, eta = eta, gamma = gamma))
}
# sigmoid function
sigma <- function(x){
y <- 1/(1+exp(-x))
return (y)
}
M <- 20000 # No. of SNPs
N <- 10000 # No. of individuals
L <- 10 # No. of fixed effects
K <- 100 # No. of random effects
Z.perc <- 0.1 # the proportion the entries in Z is 1
A.perc <- 0.1 # the proportion the entries in A is 1
beta0 <- -2 # intercept of the logistic model
set.seed(1)
b <- rnorm(L) # fixed effects
omega <- 0.1 # proportion of relevant annotations
sigma2 <- 1 # parameter in the spike-slab prior
h2 <- 0.8 # heritability
rep <- 50 # repeat times
result <- numeric(rep)
for (i in 1:rep){
cat(i, "out of", rep, "\n")
data <- generate_data_ind(M, N, L, K, Z.perc, A.perc, beta0, b, omega, sigma2, h2)
fit <- LSMM(data$Pvalue, data$Z, data$A)
result[i] <- fit$alpha
}
result <- as.data.frame(result)
names(result) <- c("est.alpha.ind")