From 9bd236173422ace20afe0ad51f8ece08acbe6043 Mon Sep 17 00:00:00 2001
From: WANG Zhiwei <48282751+statwangz@users.noreply.github.com>
Date: Wed, 20 Sep 2023 02:34:14 +0800
Subject: [PATCH] Support the sparse mode for the main data matrix Y

---
 NAMESPACE                   |   5 +-
 R/mfairBackfitting.R        | 112 ++++++++++++------
 R/mfairELBO.R               |  37 ++++--
 R/mfairGreedy.R             |  38 ++++--
 R/mfairImportance.R         |   1 -
 R/mfairInitialization.R     |   6 +-
 R/mfairPredict.R            |  11 +-
 R/mfairSingleFactor.R       | 227 +++++++++++++++++++++++++++++++++---
 R/mfairUtils.R              |  22 ++++
 man/fitSFMissing.Rd         |   6 +-
 man/fitSFSparse.Rd          |  55 +++++++++
 man/getELBO.Rd              |   4 +-
 man/predict-MFAIR-method.Rd |   4 +-
 man/projSparse.Rd           |  19 +++
 14 files changed, 458 insertions(+), 89 deletions(-)
 create mode 100644 R/mfairUtils.R
 create mode 100644 man/fitSFSparse.Rd
 create mode 100644 man/projSparse.Rd

diff --git a/NAMESPACE b/NAMESPACE
index ee9f374..9f3f4e6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,14 +4,11 @@ export(appendMFAIR)
 export(createMFAIR)
 export(fitBack)
 export(fitGreedy)
-export(fitSFFully)
-export(fitSFMissing)
 export(getELBO)
 export(getImportance)
-export(getImportanceSF)
 export(initSF)
 export(predictFX)
-export(predictFXSF)
+export(projSparse)
 export(updateMFAIR)
 exportClasses(MFAIR)
 exportClasses(MFAIRSingleFactor)
diff --git a/R/mfairBackfitting.R b/R/mfairBackfitting.R
index 4e44ea7..8e6d99d 100644
--- a/R/mfairBackfitting.R
+++ b/R/mfairBackfitting.R
@@ -1,5 +1,6 @@
 #' Fit the MFAI model using backfitting algorithm.
 #'
+#' @import Matrix
 #' @importFrom methods new
 #' @importFrom rpart rpart.control
 #'
@@ -27,14 +28,17 @@ fitBack <- function(object,
                     sf_para = list()) {
   # Check K
   if (object@K == 1) {
-    stop("The backfitting algorithm is equivalent to the greedy algorithm when rank K = 1!")
+    stop("The backfitting algorithm is equivalent to the greedy algorithm
+         when rank K = 1!")
   } # End
 
   # Check fitted functions F(), i.e., tree_0 and tree_lists
   if (length(object@tree_0) == 0) {
     object@tree_0 <- matrix(0, nrow = 1, ncol = object@K)
     warning("The previous tree_0 (i.e., fitted functions) may not be saved!\n")
-    warning("The new tree_lists obtained after the backfitting algorithm may not accurately predict the new sample with auxiliary covariates.!\n")
+    warning("The new tree_lists obtained after the backfitting algorithm
+            may not accurately predict the new sample
+            with auxiliary covariates.!\n")
   }
   if (length(object@tree_lists) == 0) {
     object@tree_lists <- lapply(1:object@K,
@@ -42,8 +46,11 @@ fitBack <- function(object,
         list()
       }
     )
-    warning("The previous tree_lists (i.e., fitted functions) may not be saved!\n")
-    warning("The new tree_lists obtained after the backfitting algorithm may not accurately predict the new sample with auxiliary covariates.!\n")
+    warning("The previous tree_lists (i.e., fitted functions)
+            may not be saved!\n")
+    warning("The new tree_lists obtained after the backfitting algorithm
+            may not accurately predict the new sample
+            with auxiliary covariates.!\n")
   }
 
   # Set up parameters for the gradient boosting part
@@ -67,7 +74,11 @@ fitBack <- function(object,
 
   # Will be used for the partially observed matrix fitting
   if (object@Y_missing) {
-    obs_indices <- !is.na(object@Y)
+    if (object@Y_sparse) {
+      obs_indices <- as.matrix(summary(Y)[, c(1, 2)])
+    } else {
+      obs_indices <- !is.na(object@Y)
+    }
   }
 
   tau <- object@tau
@@ -76,8 +87,7 @@ fitBack <- function(object,
   # Begin backfitting algorithm
   for (iter in 1:iter_max_bf) {
     for (k in 1:object@K) {
-      # The residual (low-rank approximation using all factors but k-th)
-      R <- object@Y + object@Y_mean - predict(object, which_factors = -k)
+      # Initialize
       mfair_sf <- new(
         Class = "MFAIRSingleFactor",
         Y_missing = object@Y_missing,
@@ -93,18 +103,20 @@ fitBack <- function(object,
         tree_list = object@tree_lists[[k]]
       )
 
-      if (object@Y_missing) {
-        # mfair_sf <- fitSFMissing(R, obs_indices, object@X, mfair_sf,
-        #   object@learning_rate,
-        #   tree_parameters = object@tree_parameters,
-        #   ...
-        # )
+      if (object@Y_sparse) {
+        # The residual (low-rank approximation using all factors but k-th)
+        R <- object@Y -
+          projSparse(
+            predict(object, which_factors = -k, add_mean = FALSE),
+            obs_indices
+          )
+
         mfair_sf <- do.call(
-          what = "fitSFMissing",
+          what = "fitSFSparse",
           args = append(
             list(
-              Y = R, obs_indices = obs_indices, X = object@X,
-              init = mfair_sf,
+              Y = R, X = object@X, init = mfair_sf,
+              obs_indices = obs_indices,
               stage1 = FALSE,
               learning_rate = object@learning_rate,
               tree_parameters = object@tree_parameters
@@ -113,25 +125,51 @@ fitBack <- function(object,
           )
         )
       } else {
-        # mfair_sf <- fitSFFully(R, object@X, mfair_sf,
-        #   object@learning_rate,
-        #   tree_parameters = object@tree_parameters,
-        #   ...
-        # )
-        mfair_sf <- do.call(
-          what = "fitSFFully",
-          args = append(
-            list(
-              Y = R, X = object@X,
-              init = mfair_sf,
-              stage1 = FALSE,
-              learning_rate = object@learning_rate,
-              tree_parameters = object@tree_parameters
-            ),
-            sf_para
+        # The residual (low-rank approximation using all factors but k-th)
+        R <- object@Y -
+          predict(object, which_factors = -k, add_mean = FALSE)
+
+        if (object@Y_missing) {
+          # mfair_sf <- fitSFMissing(R, obs_indices, object@X, mfair_sf,
+          #   object@learning_rate,
+          #   tree_parameters = object@tree_parameters,
+          #   ...
+          # )
+          mfair_sf <- do.call(
+            what = "fitSFMissing",
+            args = append(
+              list(
+                Y = R, X = object@X, init = mfair_sf,
+                obs_indices = obs_indices,
+                stage1 = FALSE,
+                learning_rate = object@learning_rate,
+                tree_parameters = object@tree_parameters
+              ),
+              sf_para
+            )
           )
-        )
+        } else {
+          # mfair_sf <- fitSFFully(R, object@X, mfair_sf,
+          #   object@learning_rate,
+          #   tree_parameters = object@tree_parameters,
+          #   ...
+          # )
+          mfair_sf <- do.call(
+            what = "fitSFFully",
+            args = append(
+              list(
+                Y = R, X = object@X,
+                init = mfair_sf,
+                stage1 = FALSE,
+                learning_rate = object@learning_rate,
+                tree_parameters = object@tree_parameters
+              ),
+              sf_para
+            )
+          )
+        }
       }
+
       object <- updateMFAIR(object, mfair_sf, k)
 
       if (verbose_bf_inner) {
@@ -145,9 +183,13 @@ fitBack <- function(object,
     tau_new <- object@tau
     beta_new <- object@beta
 
-    gap <- mean(abs(tau_new - tau) / abs(tau)) + mean(abs(beta_new - beta) / abs(beta))
+    gap <- mean(abs(tau_new - tau) / abs(tau)) +
+      mean(abs(beta_new - beta) / abs(beta))
     if (verbose_bf_outer) {
-      cat("Iteration: ", iter, ", relative difference of model parameters: ", gap, ".\n", sep = "")
+      cat("Iteration: ", iter,
+        ", relative difference of model parameters: ", gap, ".\n",
+        sep = ""
+      )
     }
     if (gap < tol_bf) {
       break
diff --git a/R/mfairELBO.R b/R/mfairELBO.R
index dd36598..f741b3b 100644
--- a/R/mfairELBO.R
+++ b/R/mfairELBO.R
@@ -2,21 +2,15 @@
 #'
 #' @param Y Observed main data matrix.
 #' @param object MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.
+#' @param obs_indices Indices of the observed entries in the main data matrix Y. The default value is NULL and used only when Y is stored in the sparse mode.
 #'
 #' @return Numeric. The ELBO.
 #' @export
 #'
-getELBO <- function(Y, object) {
+getELBO <- function(Y, object, obs_indices) {
   N <- nrow(Y)
   M <- ncol(Y)
 
-  # n_missing <- sum(is.na(Y))
-  # if (n_missing >= 1) {
-  #   Y_missing <- TRUE
-  # } else {
-  #   Y_missing <- FALSE
-  # }
-
   mu <- object@mu
   mu_sq <- (object@mu)^2
   nu <- object@nu
@@ -33,15 +27,34 @@ getELBO <- function(Y, object) {
   if (object@Y_missing) {
     n_obs <- object@n_obs
     elbo1 <- -n_obs * log(2 * pi / tau) / 2
-    elbo2 <- -tau * sum((Y - as.matrix(mu) %*% t(nu))^2 + as.matrix(mu_sq + a_sq) %*% t(nu_sq + b_sq) - as.matrix(mu_sq) %*% t(nu_sq), na.rm = TRUE) / 2
-    elbo3 <- -N * log(2 * pi / beta) / 2 - beta * (sum(mu_sq) + sum(a_sq) - 2 * sum(mu * FX) + sum(FX^2)) / 2
+
+    if (!is.null(obs_indices)) { # Sparse mode
+      elbo2 <- -tau *
+        sum((Y - projSparse(as.matrix(mu) %*% t(nu), obs_indices))^2 +
+          projSparse(as.matrix(mu_sq + a_sq) %*% t(nu_sq + b_sq), obs_indices) -
+          projSparse(as.matrix(mu_sq) %*% t(nu_sq), obs_indices))
+    } else {
+      elbo2 <- -tau * sum(
+        (Y - as.matrix(mu) %*% t(nu))^2 +
+          as.matrix(mu_sq + a_sq) %*% t(nu_sq + b_sq) -
+          as.matrix(mu_sq) %*% t(nu_sq),
+        na.rm = TRUE
+      ) / 2
+    }
+    elbo3 <- -N * log(2 * pi / beta) / 2 -
+      beta * (sum(mu_sq) + sum(a_sq) -
+        2 * sum(mu * FX) + sum(FX^2)) / 2
     elbo4 <- -M * log(2 * pi) / 2 - (sum(nu_sq) + sum(b_sq)) / 2
     elbo5 <- sum(log(2 * pi * a_sq)) / 2 + N / 2
     elbo6 <- sum(log(2 * pi * b_sq)) / 2 + M / 2
   } else {
     elbo1 <- -N * M * log(2 * pi / tau) / 2
-    elbo2 <- -tau * sum((Y - as.matrix(mu) %*% t(nu))^2 + as.matrix(mu_sq + a_sq) %*% t(nu_sq + b_sq) - as.matrix(mu_sq) %*% t(nu_sq)) / 2
-    elbo3 <- -N * log(2 * pi / beta) / 2 - beta * (sum(mu_sq) + N * a_sq - 2 * sum(mu * FX) + sum(FX^2)) / 2
+    elbo2 <- -tau * sum((Y - as.matrix(mu) %*% t(nu))^2 +
+      as.matrix(mu_sq + a_sq) %*% t(nu_sq + b_sq) -
+      as.matrix(mu_sq) %*% t(nu_sq)) / 2
+    elbo3 <- -N * log(2 * pi / beta) / 2 -
+      beta * (sum(mu_sq) + N * a_sq -
+        2 * sum(mu * FX) + sum(FX^2)) / 2
     elbo4 <- -M * log(2 * pi) / 2 - (sum(nu_sq) + M * b_sq) / 2
     elbo5 <- N * log(2 * pi * a_sq) / 2 + N / 2
     elbo6 <- M * log(2 * pi * b_sq) / 2 + M / 2
diff --git a/R/mfairGreedy.R b/R/mfairGreedy.R
index 22b9e2e..ba124cb 100644
--- a/R/mfairGreedy.R
+++ b/R/mfairGreedy.R
@@ -25,7 +25,7 @@ fitGreedy <- function(object, K_max = NULL,
   # Check whether partially observed main data matrix and record the indices
   if (object@Y_missing) {
     if (object@Y_sparse) {
-      obs_indices <- NULL # Sparse mode does not need indices
+      obs_indices <- as.matrix(summary(Y)[, c(1, 2)])
     } else {
       obs_indices <- !is.na(object@Y)
     }
@@ -39,7 +39,8 @@ fitGreedy <- function(object, K_max = NULL,
 
   # Check K_max
   if (object@K_max > object@N || object@K_max > object@M) {
-    warning("The maximum rank allowed can not be larger than the rank of the main data matrix!\n")
+    warning("The maximum rank allowed can not
+            be larger than the rank of the main data matrix!\n")
     object@K_max <- min(object@N, object@M)
     warning("Reset K_max = ", object@K_max, "!\n")
   }
@@ -58,9 +59,11 @@ fitGreedy <- function(object, K_max = NULL,
   } else {
     need_init <- rep(FALSE, object@K_max)
     if (init_length < object@K_max) {
-      warning("Only the first ", init_length, " factors have been initialized, which is less than K_max!\n")
+      warning("Only the first ", init_length, " factors have been initialized,
+              which is less than K_max!\n")
       need_init[-(1:init_length)] <- TRUE
-      warning("The remaining factors will be initialized automatically if needed!\n")
+      warning("The remaining factors will be initialized automatically
+              if needed!\n")
     }
   }
 
@@ -102,13 +105,26 @@ fitGreedy <- function(object, K_max = NULL,
     }
 
     # Fit the single factor MFAI model
-    if (object@Y_missing) {
+    if (object@Y_sparse) { # The main data matrix is partially observed and stored in the sparse mode
+      mfair_sf <- do.call(
+        what = "fitSFSparse",
+        args = append(
+          list(
+            Y = R, X = object@X, init = init,
+            obs_indices = obs_indices,
+            learning_rate = object@learning_rate,
+            tree_parameters = object@tree_parameters
+          ),
+          sf_para
+        )
+      )
+    } else if (object@Y_missing) { # The main data matrix is partially observed but not stored in the sparse mode
       mfair_sf <- do.call(
         what = "fitSFMissing",
         args = append(
           list(
-            Y = R, obs_indices = obs_indices,
-            X = object@X, init = init,
+            Y = R, X = object@X, init = init,
+            obs_indices = obs_indices,
             learning_rate = object@learning_rate,
             tree_parameters = object@tree_parameters
           ),
@@ -120,7 +136,7 @@ fitGreedy <- function(object, K_max = NULL,
       #   tree_parameters = object@tree_parameters,
       #   ...
       # )
-    } else {
+    } else { # The main data matrix is fully observed
       mfair_sf <- do.call(
         what = "fitSFFully",
         args = append(
@@ -155,7 +171,11 @@ fitGreedy <- function(object, K_max = NULL,
     }
 
     # Prepare for the fitting for the next factor
-    R <- R - Y_k
+    if (object@Y_sparse) {
+      R <- R - projSparse(Y_k, obs_indices)
+    } else {
+      R <- R - Y_k
+    }
 
     # Save the information about the fitted single factor MFAI model
     object <- appendMFAIR(object, mfair_sf)
diff --git a/R/mfairImportance.R b/R/mfairImportance.R
index 9242ba6..d3879a6 100644
--- a/R/mfairImportance.R
+++ b/R/mfairImportance.R
@@ -37,7 +37,6 @@ getImportance <- function(object, which_factors = seq_len(object@K)) {
 #' @param variables_names The names of the auxiliary covariates.
 #'
 #' @return Importance score vector. Each entry is the importance score of one auxiliary covariate.
-#' @export
 #'
 getImportanceSF <- function(tree_list, variables_names) {
   importance_list <- lapply(tree_list,
diff --git a/R/mfairInitialization.R b/R/mfairInitialization.R
index ae202c1..e1ff7a7 100644
--- a/R/mfairInitialization.R
+++ b/R/mfairInitialization.R
@@ -35,10 +35,9 @@ createMFAIR <- function(Y, X,
     message("The main data matrix Y has been stored in the sparse mode!")
   } else if (Y_sparse == TRUE) { # Y is not in sparse mode, but we want it to be
     obs_tf <- !is.na(Y) # Indicates whether observed or missing
-    obs_idx <- which(obs_tf, arr.ind = TRUE) # Indices of observed entries
+    obs_indices <- which(obs_tf, arr.ind = TRUE) # Indices of observed entries
     Y <- Matrix::sparseMatrix(
-      i = obs_idx[, "row"],
-      j = obs_idx[, "col"],
+      i = obs_indices[, "row"], j = obs_indices[, "col"],
       x = Y[obs_tf],
       dims = c(N, M),
       symmetric = FALSE, triangular = FALSE,
@@ -121,6 +120,7 @@ createMFAIR <- function(Y, X,
 
 #' Initialize the parameters for the single factor MAFI model.
 #'
+#' @import Matrix
 #' @importFrom stats rnorm var
 #' @importFrom methods new
 #'
diff --git a/R/mfairPredict.R b/R/mfairPredict.R
index 3a61c22..c2ea26a 100644
--- a/R/mfairPredict.R
+++ b/R/mfairPredict.R
@@ -2,6 +2,7 @@
 #'
 #' @param object A model object for which prediction is desired.
 #' @param which_factors Which factors, i.e., which columns of Z and W, are used to make prediction. All K factors are used by default.
+#' @param add_mean Logical. Indicate whether to add the mean value. The default value is TRUE.
 #'
 #' @return Predicted matrix with the same dimension as that of Y.
 #' @export
@@ -9,7 +10,8 @@
 setMethod(
   f = "predict",
   signature = "MFAIR",
-  definition = function(object, which_factors = seq_len(object@K)) {
+  definition = function(object, which_factors = seq_len(object@K),
+                        add_mean = TRUE) {
     # Check Y
     if (object@Y_missing == FALSE) {
       message("The main data matrix Y has no missing entries!")
@@ -20,7 +22,10 @@ setMethod(
       stop("The model has not been fitted!")
     } # End
 
-    Y_hat <- object@Z[, which_factors] %*% t(object@W[, which_factors]) + object@Y_mean
+    Y_hat <- object@Z[, which_factors] %*% t(object@W[, which_factors])
+    if (add_mean) {
+      Y_hat <- Y_hat + object@Y_mean
+    }
 
     return(Y_hat)
   }
@@ -31,7 +36,6 @@ setMethod(
 #' @param object A model object for which prediction is desired.
 #'
 #' @return Predicted matrix with the same dimension as that of Y.
-#' @export
 #'
 setMethod(
   f = "predict",
@@ -87,7 +91,6 @@ predictFX <- function(object, newdata, which_factors = seq_len(object@K)) {
 #' @param learning_rate Numeric. The learning rate in the gradient boosting part.
 #'
 #' @return A vector containing predicted F(X). Each entry corresponds to a new sample.
-#' @export
 #'
 predictFXSF <- function(tree_list, newdata, learning_rate) {
   learning_rate * rowSums(sapply(tree_list, predict, newdata = newdata))
diff --git a/R/mfairSingleFactor.R b/R/mfairSingleFactor.R
index 3bb28be..e7b7dc1 100644
--- a/R/mfairSingleFactor.R
+++ b/R/mfairSingleFactor.R
@@ -1,5 +1,6 @@
-#' Fit the single factor MFAI model with partially observed main data matrix.
+#' Fit the single factor MFAI model with partially observed main data matrix stored in the sparse mode.
 #'
+#' @import Matrix
 #' @importFrom rpart rpart
 #'
 #' @param Y Main data matrix.
@@ -17,10 +18,175 @@
 #' @param save_tree_list Logical. Whether to save the tree list.
 #'
 #' @return A MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.
-#' @export
 #'
+fitSFSparse <- function(Y, X, init,
+                        obs_indices,
+                        learning_rate, tree_parameters,
+                        stage1 = TRUE,
+                        iter_max = 5e+3, tol_stage1 = 0.1, tol_stage2 = 1e-5,
+                        verbose_sf = TRUE, verbose_loop = TRUE,
+                        save_tree_list = TRUE) {
+  N <- nrow(Y)
+  M <- ncol(Y)
+
+  # Represent the precision tau as matrix
+  tau_mat <- Matrix::sparseMatrix(
+    i = obs_indices[, 1], j = obs_indices[, 2],
+    x = init@tau,
+    dims = c(N, M),
+    symmetric = FALSE, triangular = FALSE,
+    index1 = TRUE,
+    repr = "C"
+  )
+
+  ELBO_old <- getELBO(Y, init, obs_indices)
+
+  if (stage1) {
+    # Stage 1
+    for (iter in 1:iter_max) {
+      # E-step
+      E_zsq <- (init@mu)^2 + init@a_sq
+      init@b_sq <- 1 / (1 + as.vector(t(tau_mat) %*% as.matrix(E_zsq)))
+      init@nu <- init@b_sq *
+        as.vector(init@tau * (t(Y) %*% as.matrix(init@mu)))
+
+      E_wsq <- (init@nu)^2 + init@b_sq
+      init@a_sq <- 1 / (init@beta + as.vector(tau_mat %*% as.matrix(E_wsq)))
+      init@mu <- init@a_sq *
+        (init@beta * init@FX +
+          as.vector(init@tau * (Y %*% as.matrix(init@nu))))
+
+      # M-step
+      mu_sq <- (init@mu)^2
+      nu_sq <- (init@nu)^2
+
+      init@tau <- init@n_obs /
+        sum((Y - projSparse(as.matrix(init@mu) %*% t(init@nu), obs_indices))^2 +
+          projSparse(as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq), obs_indices) -
+          projSparse(as.matrix(mu_sq) %*% t(nu_sq), obs_indices))
+      tau_mat@x <- rep(init@tau, length(tau_mat@x))
+
+      init@beta <- N / (sum((init@mu - init@FX)^2) + sum(init@a_sq))
+      # init@beta <- ifelse(init@beta < 1e-8, 1e-8, init@beta)
+
+      ELBO_current <- getELBO(Y, init, obs_indices)
+      if (ELBO_current < ELBO_old) {
+        warning("ELBO decreasing!\n")
+      }
+
+      gap <- abs((ELBO_current - ELBO_old) / ELBO_old)
+      if (verbose_loop) {
+        cat("Iteration: ", iter, ", ELBO: ", ELBO_current, ", tau: ", init@tau,
+          ", beta: ", init@beta, ", relative difference: ", gap, ".\n",
+          sep = ""
+        )
+      }
+      if (gap < tol_stage1) {
+        break
+      }
+
+      ELBO_old <- ELBO_current
+    }
+
+    if (verbose_sf) {
+      cat("After ", iter, " iterations Stage 1 ends!\n", sep = "")
+    }
+
+    # Tree_0 is the mean of mu vector
+    init@tree_0 <- mean(init@mu)
+    init@FX <- init@FX + init@tree_0
+  } else {
+    if (verbose_sf) {
+      cat("Stage 1 skipped!\n")
+    }
+  }
+
+  # Stage 2
+  gb_data <- data.frame(r = NA, X)
+  for (iter in 1:iter_max) {
+    # E-step
+    E_zsq <- (init@mu)^2 + init@a_sq
+    init@b_sq <- 1 / (1 + as.vector(t(tau_mat) %*% as.matrix(E_zsq)))
+    init@nu <- init@b_sq *
+      as.vector(init@tau * (t(Y) %*% as.matrix(init@mu)))
+
+    E_wsq <- (init@nu)^2 + init@b_sq
+    init@a_sq <- 1 / (init@beta + as.vector(tau_mat %*% as.matrix(E_wsq)))
+    init@mu <- init@a_sq *
+      (init@beta * init@FX +
+        as.vector(init@tau * (Y %*% as.matrix(init@nu))))
 
-fitSFMissing <- function(Y, obs_indices, X, init,
+    # M-step
+    mu_sq <- (init@mu)^2
+    nu_sq <- (init@nu)^2
+
+    init@tau <- init@n_obs /
+      sum((Y - projSparse(as.matrix(init@mu) %*% t(init@nu), obs_indices))^2 +
+        projSparse(as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq), obs_indices) -
+        projSparse(as.matrix(mu_sq) %*% t(nu_sq), obs_indices))
+    tau_mat@x <- rep(init@tau, length(tau_mat@x))
+
+    init@beta <- N / (sum((init@mu - init@FX)^2) + sum(init@a_sq))
+
+    # Gradient boosting
+    gb_data$r <- init@mu - init@FX
+    fitted_tree <- rpart(r ~ ., data = gb_data, control = tree_parameters)
+    init@FX <- init@FX + learning_rate * predict(fitted_tree, gb_data)
+
+    # save tree list
+    if (save_tree_list) {
+      init@tree_list <- append(init@tree_list, list(fitted_tree))
+    }
+
+    ELBO_current <- getELBO(Y, init, obs_indices)
+    if (ELBO_current < ELBO_old) {
+      warning("ELBO decreasing!\n")
+    }
+
+    gap <- abs((ELBO_current - ELBO_old) / ELBO_old)
+    if (verbose_loop) {
+      cat("Iteration: ", iter, ", ELBO: ", ELBO_current, ", tau: ", init@tau,
+        ", beta: ", init@beta, ", relative difference: ", gap, ".\n",
+        sep = ""
+      )
+    }
+    if (gap < tol_stage2) {
+      break
+    }
+
+    ELBO_old <- ELBO_current
+  }
+
+  if (verbose_sf) {
+    cat("After ", iter, " iterations Stage 2 ends!\n", sep = "")
+  }
+
+  return(init)
+}
+
+#' Fit the single factor MFAI model with partially observed main data matrix.
+#'
+#' @import Matrix
+#' @importFrom rpart rpart
+#'
+#' @param Y Main data matrix.
+#' @param X A data.frame containing the auxiliary information.
+#' @param init A MFAIRSingleFactor object containing the initial parameters for the single factor MAFI model.
+#' @param obs_indices Indices of the observed entries in the main data matrix.
+#' @param learning_rate Numeric. Parameter for the gradient boosting part.
+#' @param tree_parameters A list containing the parameters for the gradient boosting part.
+#' @param stage1 Logical. Whether to perform fitting algorithm stage1. The greedy algorithm needs while the backfitting algorithm does not need.
+#' @param iter_max Integer. Maximum iterations allowed.
+#' @param tol_stage1 Numeric. Convergence criterion in the first step.
+#' @param tol_stage2 Numeric. Convergence criterion in the first step.
+#' @param verbose_sf Logical. Whether to display the detailed information.
+#' @param verbose_loop Logical. Whether to display the detailed information when looping.
+#' @param save_tree_list Logical. Whether to save the tree list.
+#'
+#' @return A MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.
+#'
+fitSFMissing <- function(Y, X, init,
+                         obs_indices,
                          learning_rate, tree_parameters,
                          stage1 = TRUE,
                          iter_max = 5e+3, tol_stage1 = 0.1, tol_stage2 = 1e-5,
@@ -45,17 +211,26 @@ fitSFMissing <- function(Y, obs_indices, X, init,
       # E-step
       E_zsq <- (init@mu)^2 + init@a_sq
       init@b_sq <- 1 / (1 + as.vector(t(tau_mat) %*% as.matrix(E_zsq)))
-      init@nu <- init@b_sq * as.vector(init@tau * (t(ProjY) %*% as.matrix(init@mu)))
+      init@nu <- init@b_sq *
+        as.vector(init@tau * (t(ProjY) %*% as.matrix(init@mu)))
 
       E_wsq <- (init@nu)^2 + init@b_sq
       init@a_sq <- 1 / (init@beta + as.vector(tau_mat %*% as.matrix(E_wsq)))
-      init@mu <- init@a_sq * (init@beta * init@FX + as.vector(init@tau * (ProjY %*% as.matrix(init@nu))))
+      init@mu <- init@a_sq *
+        (init@beta * init@FX +
+          as.vector(init@tau * (ProjY %*% as.matrix(init@nu))))
 
       # M-step
       mu_sq <- (init@mu)^2
       nu_sq <- (init@nu)^2
 
-      init@tau <- init@n_obs / sum(((Y - as.matrix(init@mu) %*% t(init@nu))^2 + as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) - as.matrix(mu_sq) %*% t(nu_sq)), na.rm = TRUE)
+      init@tau <- init@n_obs /
+        sum(
+          (Y - as.matrix(init@mu) %*% t(init@nu))^2 +
+            as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) -
+            as.matrix(mu_sq) %*% t(nu_sq),
+          na.rm = TRUE
+        )
       tau_mat <- matrix(init@tau, N, M)
       tau_mat[!obs_indices] <- 0
 
@@ -100,17 +275,26 @@ fitSFMissing <- function(Y, obs_indices, X, init,
     # E-step
     E_zsq <- (init@mu)^2 + init@a_sq
     init@b_sq <- 1 / (1 + as.vector(t(tau_mat) %*% as.matrix(E_zsq)))
-    init@nu <- init@b_sq * as.vector(init@tau * (t(ProjY) %*% as.matrix(init@mu)))
+    init@nu <- init@b_sq *
+      as.vector(init@tau * (t(ProjY) %*% as.matrix(init@mu)))
 
     E_wsq <- (init@nu)^2 + init@b_sq
     init@a_sq <- 1 / (init@beta + as.vector(tau_mat %*% as.matrix(E_wsq)))
-    init@mu <- init@a_sq * (init@beta * init@FX + as.vector(init@tau * (ProjY %*% as.matrix(init@nu))))
+    init@mu <- init@a_sq *
+      (init@beta * init@FX +
+        as.vector(init@tau * (ProjY %*% as.matrix(init@nu))))
 
     # M-step
     mu_sq <- (init@mu)^2
     nu_sq <- (init@nu)^2
 
-    init@tau <- init@n_obs / sum(((Y - as.matrix(init@mu) %*% t(init@nu))^2 + as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) - as.matrix(mu_sq) %*% t(nu_sq)), na.rm = TRUE)
+    init@tau <- init@n_obs /
+      sum(
+        (Y - as.matrix(init@mu) %*% t(init@nu))^2 +
+          as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) -
+          as.matrix(mu_sq) %*% t(nu_sq),
+        na.rm = TRUE
+      )
     tau_mat <- matrix(init@tau, N, M)
     tau_mat[!obs_indices] <- 0
 
@@ -170,7 +354,6 @@ fitSFMissing <- function(Y, obs_indices, X, init,
 #' @param save_tree_list Logical. Whether to save the tree list.
 #'
 #' @return A MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.
-#' @export
 #'
 fitSFFully <- function(Y, X, init,
                        learning_rate, tree_parameters,
@@ -188,14 +371,20 @@ fitSFFully <- function(Y, X, init,
     for (iter in 1:iter_max) {
       # E-step
       init@b_sq <- 1 / (1 + init@tau * (sum((init@mu)^2) + N * init@a_sq))
-      init@nu <- init@b_sq * init@tau * as.vector(t(Y) %*% as.matrix(init@mu))
+      init@nu <- init@b_sq *
+        init@tau * as.vector(t(Y) %*% as.matrix(init@mu))
       init@a_sq <- 1 / (init@beta + init@tau * (sum((init@nu)^2) + M * init@b_sq))
-      init@mu <- init@a_sq * (init@beta * init@FX + init@tau * as.vector(Y %*% as.matrix(init@nu)))
+      init@mu <- init@a_sq *
+        (init@beta * init@FX +
+          init@tau * as.vector(Y %*% as.matrix(init@nu)))
 
       # M-step
       mu_sq <- (init@mu)^2
       nu_sq <- (init@nu)^2
-      init@tau <- N * M / sum(((Y - as.matrix(init@mu) %*% t(init@nu))^2 + as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) - as.matrix(mu_sq) %*% t(nu_sq)))
+      init@tau <- N * M /
+        sum((Y - as.matrix(init@mu) %*% t(init@nu))^2 +
+          as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) -
+          as.matrix(mu_sq) %*% t(nu_sq))
       init@beta <- N / (sum((init@mu - init@FX)^2) + N * init@a_sq)
       # init@beta <- ifelse(init@beta < 1e-8, 1e-8, init@beta)
 
@@ -236,14 +425,20 @@ fitSFFully <- function(Y, X, init,
   for (iter in 1:iter_max) {
     # E-step
     init@b_sq <- 1 / (1 + init@tau * (sum((init@mu)^2) + N * init@a_sq))
-    init@nu <- init@b_sq * init@tau * as.vector(t(Y) %*% as.matrix(init@mu))
+    init@nu <- init@b_sq *
+      init@tau * as.vector(t(Y) %*% as.matrix(init@mu))
     init@a_sq <- 1 / (init@beta + init@tau * (sum((init@nu)^2) + M * init@b_sq))
-    init@mu <- init@a_sq * (init@beta * init@FX + init@tau * as.vector(Y %*% as.matrix(init@nu)))
+    init@mu <- init@a_sq *
+      (init@beta * init@FX +
+        init@tau * as.vector(Y %*% as.matrix(init@nu)))
 
     # M-step
     mu_sq <- (init@mu)^2
     nu_sq <- (init@nu)^2
-    init@tau <- N * M / sum(((Y - as.matrix(init@mu) %*% t(init@nu))^2 + as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) - as.matrix(mu_sq) %*% t(nu_sq)))
+    init@tau <- N * M /
+      sum((Y - as.matrix(init@mu) %*% t(init@nu))^2 +
+        as.matrix(mu_sq + init@a_sq) %*% t(nu_sq + init@b_sq) -
+        as.matrix(mu_sq) %*% t(nu_sq))
     init@beta <- N / (sum((init@mu - init@FX)^2) + N * init@a_sq)
     # init@beta <- ifelse(init@beta < 1e-8, 1e-8, init@beta)
 
diff --git a/R/mfairUtils.R b/R/mfairUtils.R
new file mode 100644
index 0000000..8c7ec1b
--- /dev/null
+++ b/R/mfairUtils.R
@@ -0,0 +1,22 @@
+#' Project a matrix with given indices and store the result in the sparse mode.
+#'
+#' @import Matrix
+#'
+#' @param Y A matrix to be projected.
+#' @param obs_indices A matrix containing 1-based indices of the observed entries in the matrix Y. The first column represents row and the second column represents column.
+#'
+#' @return A dgCMatrix containing the projection result.
+#' @export
+#'
+projSparse <- function(Y, obs_indices) {
+  N <- nrow(Y)
+  M <- ncol(Y)
+  Y <- Matrix::sparseMatrix(
+    i = obs_indices[, 1], j = obs_indices[, 2],
+    x = Y[obs_indices],
+    dims = c(N, M),
+    symmetric = FALSE, triangular = FALSE,
+    index1 = TRUE,
+    repr = "C"
+  )
+}
diff --git a/man/fitSFMissing.Rd b/man/fitSFMissing.Rd
index dc850d0..ce16846 100644
--- a/man/fitSFMissing.Rd
+++ b/man/fitSFMissing.Rd
@@ -6,9 +6,9 @@
 \usage{
 fitSFMissing(
   Y,
-  obs_indices,
   X,
   init,
+  obs_indices,
   learning_rate,
   tree_parameters,
   stage1 = TRUE,
@@ -23,12 +23,12 @@ fitSFMissing(
 \arguments{
 \item{Y}{Main data matrix.}
 
-\item{obs_indices}{Indices of the observed entries in the main data matrix.}
-
 \item{X}{A data.frame containing the auxiliary information.}
 
 \item{init}{A MFAIRSingleFactor object containing the initial parameters for the single factor MAFI model.}
 
+\item{obs_indices}{Indices of the observed entries in the main data matrix.}
+
 \item{learning_rate}{Numeric. Parameter for the gradient boosting part.}
 
 \item{tree_parameters}{A list containing the parameters for the gradient boosting part.}
diff --git a/man/fitSFSparse.Rd b/man/fitSFSparse.Rd
new file mode 100644
index 0000000..224033f
--- /dev/null
+++ b/man/fitSFSparse.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mfairSingleFactor.R
+\name{fitSFSparse}
+\alias{fitSFSparse}
+\title{Fit the single factor MFAI model with partially observed main data matrix stored in the sparse mode.}
+\usage{
+fitSFSparse(
+  Y,
+  X,
+  init,
+  obs_indices,
+  learning_rate,
+  tree_parameters,
+  stage1 = TRUE,
+  iter_max = 5000,
+  tol_stage1 = 0.1,
+  tol_stage2 = 1e-05,
+  verbose_sf = TRUE,
+  verbose_loop = TRUE,
+  save_tree_list = TRUE
+)
+}
+\arguments{
+\item{Y}{Main data matrix.}
+
+\item{X}{A data.frame containing the auxiliary information.}
+
+\item{init}{A MFAIRSingleFactor object containing the initial parameters for the single factor MAFI model.}
+
+\item{obs_indices}{Indices of the observed entries in the main data matrix.}
+
+\item{learning_rate}{Numeric. Parameter for the gradient boosting part.}
+
+\item{tree_parameters}{A list containing the parameters for the gradient boosting part.}
+
+\item{stage1}{Logical. Whether to perform fitting algorithm stage1. The greedy algorithm needs while the backfitting algorithm does not need.}
+
+\item{iter_max}{Integer. Maximum iterations allowed.}
+
+\item{tol_stage1}{Numeric. Convergence criterion in the first step.}
+
+\item{tol_stage2}{Numeric. Convergence criterion in the first step.}
+
+\item{verbose_sf}{Logical. Whether to display the detailed information.}
+
+\item{verbose_loop}{Logical. Whether to display the detailed information when looping.}
+
+\item{save_tree_list}{Logical. Whether to save the tree list.}
+}
+\value{
+A MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.
+}
+\description{
+Fit the single factor MFAI model with partially observed main data matrix stored in the sparse mode.
+}
diff --git a/man/getELBO.Rd b/man/getELBO.Rd
index 730eda4..61f36fd 100644
--- a/man/getELBO.Rd
+++ b/man/getELBO.Rd
@@ -4,12 +4,14 @@
 \alias{getELBO}
 \title{Compute the evidence lower bound (ELBO) for fitted single factor MFAI model.}
 \usage{
-getELBO(Y, object)
+getELBO(Y, object, obs_indices)
 }
 \arguments{
 \item{Y}{Observed main data matrix.}
 
 \item{object}{MFAIRSingleFactor object containing the information about the fitted single factor MFAI model.}
+
+\item{obs_indices}{Indices of the observed entries in the main data matrix Y. The default value is NULL and used only when Y is stored in the sparse mode.}
 }
 \value{
 Numeric. The ELBO.
diff --git a/man/predict-MFAIR-method.Rd b/man/predict-MFAIR-method.Rd
index 6a00c90..5004233 100644
--- a/man/predict-MFAIR-method.Rd
+++ b/man/predict-MFAIR-method.Rd
@@ -4,12 +4,14 @@
 \alias{predict,MFAIR-method}
 \title{Prediction function for MFAIR object.}
 \usage{
-\S4method{predict}{MFAIR}(object, which_factors = seq_len(object@K))
+\S4method{predict}{MFAIR}(object, which_factors = seq_len(object@K), add_mean = TRUE)
 }
 \arguments{
 \item{object}{A model object for which prediction is desired.}
 
 \item{which_factors}{Which factors, i.e., which columns of Z and W, are used to make prediction. All K factors are used by default.}
+
+\item{add_mean}{Logical. Indicate whether to add the mean value. The default value is TRUE.}
 }
 \value{
 Predicted matrix with the same dimension as that of Y.
diff --git a/man/projSparse.Rd b/man/projSparse.Rd
new file mode 100644
index 0000000..5a5b76d
--- /dev/null
+++ b/man/projSparse.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mfairUtils.R
+\name{projSparse}
+\alias{projSparse}
+\title{Project a matrix with given indices and store the result in the sparse mode.}
+\usage{
+projSparse(Y, obs_indices)
+}
+\arguments{
+\item{Y}{A matrix to be projected.}
+
+\item{obs_indices}{A matrix containing 1-based indices of the observed entries in the matrix Y. The first column represents row and the second column represents column.}
+}
+\value{
+A dgCMatrix containing the projection result.
+}
+\description{
+Project a matrix with given indices and store the result in the sparse mode.
+}