initialize from offline dev repo

bdilday · Jan 25, 2018 · 4df22b7 · 4df22b7
1 parent 1750a91
commit 4df22b7
Show file tree

Hide file tree

Showing 19 changed files with 1,533 additions and 1 deletion.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -31,3 +31,4 @@ vignettes/*.pdf
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,14 @@
+Package: mnre
+Title: Multinomial Random Effects Models
+Version: 0.1
+Authors@R: person("Ben", "Dilday", email = "[email protected]", role = c("aut", "cre"))
+Description: Multinomial Random Effects Models
+Depends: R (>= 3.4.1)
+License: MIT
+Encoding: UTF-8
+LazyData: true
+Imports: Rcpp (>= 0.12.12)
+LinkingTo: Rcpp,
+  RcppArmadillo,
+  RcppEigen
+RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,13 @@
+# Generated by roxygen2: do not edit by hand
+
+export(mnre_dim_and_class_to_index)
+export(mnre_expand_matrix)
+export(mnre_fit_sparse)
+export(mnre_make_covar)
+export(mnre_simulate_ev_data)
+export(mnre_simulate_multinomial_data_factors)
+export(nd_min_fun)
+import(Matrix)
+importFrom(Rcpp,sourceCpp)
+importFrom(magrittr,"%>%")
+useDynLib(mnre)
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,90 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#' Index from dimension and class indicies
+#' 
+#' @param i_dim integer
+#' @param i_class integer
+#' @param n_dim integer
+#' @examples
+#' mnre_dim_and_class_to_index(1, 1, 1)
+#' mnre_dim_and_class_to_index(2, 3, 5)
+#' @return The integer index corresponding to the dimension and class indicies
+#' @export
+mnre_dim_and_class_to_index <- function(i_dim, i_class, n_dim) {
+    .Call('_mnre_mnre_dim_and_class_to_index', PACKAGE = 'mnre', i_dim, i_class, n_dim)
+}
+
+#' Make covariance matrix
+#' 
+#' @param theta_mat numeric matrix
+#' @param Lind unsigned integer matrix
+#' @param off_diagonal numeric
+#' @examples
+#' \dontrun{
+#' theta_mat <- matrix(1:4, ncol=2)
+#' Lind <- matrix(c(rep(1, 10), rep(2, 5)), ncol=1)
+#' covar_mat <- mnre_make_covar(theta_mat, Lind)
+#' }
+#' @return Covariance matrix for random effects
+#' @export
+mnre_make_covar <- function(theta_mat, Lind, off_diagonal = 0.0) {
+    .Call('_mnre_mnre_make_covar', PACKAGE = 'mnre', theta_mat, Lind, off_diagonal)
+}
+
+#' Expand matrix
+#' 
+#' Used to duplicate the random effects coefficient matrix before applying the left factor
+#' @param x1 sparse matrix
+#' @param k_class integer
+#' @param direction integer
+#' @return Matrix expanded in the The integer index corresponding to the dimension and class indicies
+#' @export
+mnre_expand_matrix <- function(x1, k_class, direction) {
+    .Call('_mnre_mnre_expand_matrix', PACKAGE = 'mnre', x1, k_class, direction)
+}
+
+mnre_left_covar_factor <- function(x1) {
+    .Call('_mnre_mnre_left_covar_factor', PACKAGE = 'mnre', x1)
+}
+
+#' Multinomial mixed effects fit
+#' 
+#' Used to duplicate the random effects coefficient matrix before applying the left factor
+#' @param x1 sparse matrix
+#' @param k_class integer
+#' @param direction integer
+#' @return Matrix expanded in the The integer index corresponding to the dimension and class indicies
+#' @export
+mnre_fit_sparse <- function(fixed_effects, random_effects, y, theta_mat, Lind, beta_fixed, beta_random, verbose = 1L) {
+    .Call('_mnre_mnre_fit_sparse', PACKAGE = 'mnre', fixed_effects, random_effects, y, theta_mat, Lind, beta_fixed, beta_random, verbose)
+}
+
+mnre_lk_penalty <- function(beta_random, theta_norm, Lind) {
+    .Call('_mnre_mnre_lk_penalty', PACKAGE = 'mnre', beta_random, theta_norm, Lind)
+}
+
+mnre_lk_glm <- function(fixed_effects, random_effects, beta_fixed, beta_random, y, Lind) {
+    .Call('_mnre_mnre_lk_glm', PACKAGE = 'mnre', fixed_effects, random_effects, beta_fixed, beta_random, y, Lind)
+}
+
+mnre_lk <- function(fixed_effects, random_effects, beta_fixed, beta_random, y, theta_norm, Lind) {
+    .Call('_mnre_mnre_lk', PACKAGE = 'mnre', fixed_effects, random_effects, beta_fixed, beta_random, y, theta_norm, Lind)
+}
+
+mnre_mu <- function(fixed_effects, random_effects, beta_fixed, beta_random) {
+    .Call('_mnre_mnre_mu', PACKAGE = 'mnre', fixed_effects, random_effects, beta_fixed, beta_random)
+}
+
+mnre_mu_x <- function(fe_x, re_x, beta_fixed, beta_random) {
+    .Call('_mnre_mnre_mu_x', PACKAGE = 'mnre', fe_x, re_x, beta_fixed, beta_random)
+}
+
+mnre_step_sparse <- function(fixed_effects, random_effects, y, beta_fixed, beta_random, lambda_norm, Lind) {
+    .Call('_mnre_mnre_step_sparse', PACKAGE = 'mnre', fixed_effects, random_effects, y, beta_fixed, beta_random, lambda_norm, Lind)
+}
+
+fill_mtwm_x <- function(x1, x2, mu) {
+    .Call('_mnre_fill_mtwm_x', PACKAGE = 'mnre', x1, x2, mu)
+}
+
diff --git a/R/main.R b/R/main.R
@@ -0,0 +1,243 @@
+
+#' @import Matrix
+#' @importFrom magrittr %>%
+#' @export
+mnre_simulate_ev_data <- function(nlim=1000, year=2016, OBP=FALSE) {
+  ev_obj <- BProDRA:::generate_model_df(nlim=nlim, year=year)
+  dfX <- ev_obj$ev
+
+  # the BPRO code codes it like this
+  # ev[cc_bip0,]$outcome <- 1
+  # ev[cc_bip1,]$outcome <- 2
+  # ev[cc_bip2,]$outcome <- 3
+  # ev[cc_hr,]$outcome <- 4
+  # ev[cc_so,]$outcome <- 5
+  # ev[cc_bb,]$outcome <- 6
+
+  idx1 <- which(dfX$outcome == 1)
+  idx2 <- which(dfX$outcome == 2)
+  idx3 <- which(dfX$outcome == 3)
+  idx4 <- which(dfX$outcome == 4)
+  idx5 <- which(dfX$outcome == 5)
+  idx6 <- which(dfX$outcome == 6)
+
+  # make batted ball for out the reference
+  dfX[idx1,]$outcome <- 0
+
+  # only 1 kind of batted ball for hit
+  dfX[c(idx2, idx3),]$outcome <- 1
+
+  # shift the other categories up
+  # HR
+  dfX[idx4,]$outcome <- 2
+
+  # shift the other categories up
+  # SO
+  dfX[idx5,]$outcome <- 3
+
+  # shift the other categories up
+  # BB
+  dfX[idx6,]$outcome <- 4
+
+  # OBP
+  if (OBP) {
+    dfX[c(idx1, idx5),]$outcome <- 0
+    dfX[c(idx2, idx3, idx4, idx6),]$outcome <- 1
+  }
+
+  dfX$y <- dfX$outcome
+
+   glf <- lme4::glFormula(y ~ (1|BAT_ID) + (1|PIT_ID) + (1|HOME_TEAM_ID),
+                          data=dfX, family='binomial')
+  #glf <- lme4::glFormula(y ~ (1|PIT_ID) + (1|BAT_ID),
+   #                      data=dfX, family='binomial')
+
+  dfX <- dfX %>% dplyr::mutate(pid=as.integer(as.factor(PIT_ID)))
+  dfX <- dfX %>% dplyr::mutate(bid=as.integer(as.factor(BAT_ID)))
+
+  dfX <- dfX %>% dplyr::mutate(sid=as.integer(as.factor(HOME_TEAM_ID)))
+  dfX <- dfX %>% dplyr::mutate(bid = bid+max(pid))
+  dfX <- dfX %>% dplyr::mutate(sid = sid+max(bid))
+
+  re <- random_effects <- Matrix::t(glf$reTrms$Zt)
+  fe <- fixed_effects <- (glf$X)
+  k_class <- max(dfX$y)
+  beta_re <- matrix(rep(0, k_class * dim(re)[[2]]), ncol=k_class)
+  beta_fe <- matrix(rep(0, k_class * dim(fe)[[2]]), ncol=k_class)
+  xx <- model.matrix(y ~ BAT_ID + PIT_ID + HOME_TEAM_ID, data=dfX)
+#  xx <- model.matrix(y ~ BAT_ID + PIT_ID, data=dfX)
+  y <- matrix(dfX$y, ncol=1)
+  theta_init <- matrix(rep(glf$reTrms$theta, k_class), ncol=k_class)
+  list(fr=dfX, frm=glf$formula,
+       ev_obj=ev_obj, xx=xx, y=y,
+       re=re, fe=fe,
+       beta_re=beta_re, beta_fe=beta_fe,
+       theta_init=theta_init,
+       Lind=matrix(glf$reTrms$Lind, ncol=1))
+}
+
+
+
+#' @export
+mnre_simulate_multinomial_data_factors <- function(rseed=101,
+                                                           nfct=2,
+                                                           nlev=10,
+                                                           K_class=3,
+                                                           nobs=10000) {
+
+  set.seed(rseed)
+
+  k <- K_class - 1
+  fcts <- lapply(1:nfct, function(i) {sprintf("%s%03d", LETTERS[i], 1:nlev)})
+  sigmas <- matrix(rep(1, nfct * k), ncol=k)
+
+  coef_null_class <- matrix(rnorm(nfct * length(fcts[[1]])), ncol=nfct)
+
+  # the null class needs to be identically 0, otherwise there will be a
+  # spurious corelation between coefficients for the other classes.
+  coef_null_class = coef_null_class * 0
+
+  coefs <- lapply(1:nfct, function(i) {
+    tmp <- as.data.frame(
+      matrix(rnorm(k * length(fcts[[i]])), ncol=k) - coef_null_class[,i]
+      )
+    tmp$fct <- as.factor(fcts[[i]])
+    tmp
+  })
+
+  df_obs <- lapply(1:length(coefs), function(i) {
+    dplyr::data_frame(fct=sample(fcts[[i]], nobs, replace = TRUE))
+  })
+
+  df_obs <- dplyr::bind_cols(df_obs)
+
+  names(df_obs) <- sprintf("fct%02d", 1:nfct)
+
+  dfX <- df_obs
+  for (i in 1:length(coefs)) {
+    dfX <- dfX %>% merge(coefs[[i]], by.x=sprintf("fct%02d", i), by.y="fct")
+  }
+
+  # must be identically 0
+  icpt_null_class <- 0
+  icpts <- matrix(rnorm(k), ncol=k) - icpt_null_class
+
+  oc <- sapply(1:nrow(dfX), function(i) {
+
+    r <- dfX[i,]
+    seq(nfct+1,ncol(r),k)
+
+    lams <- sapply(1:k, function(j) {
+      noise <- rnorm(1, 0, 0.2)
+      ii=seq(nfct+j,ncol(r),k)
+      icpts[[j]] + sum(r[ii]) + noise
+    })
+
+    probs <- c(1, exp(lams))
+
+    tmp <- rmultinom(1, 1, probs)
+    as.integer(which(tmp > 0) - 1 )
+  })
+
+  dfX <- dfX %>% dplyr::mutate(y=oc)
+  fct_cut <- which(grepl('^fct|^y$', names(dfX)))
+
+  if (length(fct_cut) > 1) {
+    dfY <- dfX[,fct_cut]
+  } else {
+    dfY <- dplyr::data_frame(fct01=dfX[,fct_cut])
+  }
+
+  s = ' y ~ (1|fct01) '
+
+  if (nfct >= 2) {
+    for (i in 2:nfct) {
+      s <- sprintf("%s + (1|fct%02d) ", s, i)
+    }
+  }
+
+  frm <- as.formula(s)
+  glf <- lme4::glFormula(frm,
+                         data=dfY, family='binomial')
+  re <- random_effects <- Matrix::t(glf$reTrms$Zt)
+  fe <- fixed_effects <- (glf$X)
+  k_class <- max(dfY$y)
+  beta_re <- matrix(rep(0, k_class * dim(re)[[2]]), ncol=k_class)
+  beta_fe <- matrix(rep(0, k_class * dim(fe)[[2]]), ncol=k_class)
+  y <- matrix(dfY$y, ncol=1)
+  theta_init <- matrix(rep(glf$reTrms$theta, k_class), ncol=k_class)
+  ans <- list(true_pars=dfX,
+              fr=glf$fr, y=y,
+              re=re, fe=fe,
+              frm=frm,
+              beta_re=beta_re, beta_fe=beta_fe,
+              theta_init=theta_init,
+              Lind=matrix(glf$reTrms$Lind, ncol=1),
+              off_diagonal=0.0)
+
+  ans
+
+}
+
+
+#' N-dimensional function generator
+#' @param ev list
+#' @return deviance function
+#' @examples 
+#' \dontrun{
+#' ev = mnre_simulate_multinomial_data_factors(nfct=2, K_class = 2, nlev=50, nobs=20000)
+#' nf <- nd_min_fun(ev)
+#' nf(c(1,1))
+#' }
+#' @export
+nd_min_fun <- function(ev) {
+
+  frm <- ev$frm
+  if ("off_diagonal" %in% names(ev)) {
+    ev$off_diagonal <- ev$off_diagonal
+  } else {
+    ev$off_diagonal <- 0.0
+  }
+
+  if (!"verbose" %in% names(ev)) {
+    ev$verbose <- 1
+  }
+
+  function(mval) {
+    glf <- lme4::glFormula(ev$frm,
+                           data=ev$fr, family='binomial')
+    fe <- fixed_effects <- (glf$X)
+    re <- random_effects <- Matrix::t(glf$reTrms$Zt)
+    y <- matrix(ev$y, ncol=1)
+    k_class <- max(y)
+    k <- max(y)
+    Lind = ev$Lind
+
+    s = 'mval '
+    for (v in mval) {
+      s = sprintf("%s %.4e ", s, v)
+    }
+    message(s)
+
+    theta_mat <- matrix(mval, ncol=k_class)
+
+    fe2 <- Matrix::Matrix(fe, sparse = TRUE)
+
+    beta_re <- ev$beta_re
+    beta_fe <- ev$beta_fe
+
+    # beta_re <- matrix(rep(0, ncol(re) * k_class), ncol=k_class)
+    # beta_fe <- matrix(rep(0, ncol(fe) * k_class), ncol=k_class)
+    #
+    message("starting beta ", beta_fe[[1]], " ", beta_re[[1]])
+    zz <- mnre_fit_sparse(fe2, re, y, theta_mat, Lind, beta_fe, beta_re, verbose = ev$verbose)
+
+    ev$beta_fe <<- zz$beta_fixed
+    ev$beta_re <<- zz$beta_random
+
+    zz$loglk + zz$loglk_det
+
+  }
+}
+
+
diff --git a/R/mnre.R b/R/mnre.R
@@ -0,0 +1,4 @@
+#' @useDynLib mnre
+#' @importFrom Rcpp sourceCpp
+NULL
+