diff --git a/pkgdown.yml b/pkgdown.yml index 67517db..e0e2a95 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,7 +4,7 @@ pkgdown_sha: ~ articles: ml100k: ml100k.html neocortex: neocortex.html -last_built: 2023-09-19T07:04Z +last_built: 2023-09-19T07:34Z urls: reference: https://yanglabhkust.github.io/mfair/reference article: https://yanglabhkust.github.io/mfair/articles diff --git a/reference/initSF.html b/reference/initSF.html index b3d5870..05995cb 100644 --- a/reference/initSF.html +++ b/reference/initSF.html @@ -55,7 +55,7 @@

Usage

-
initSF(Y, Y_missing = NULL, n_obs)
+
initSF(Y, Y_missing, Y_sparse, n_obs)
@@ -65,7 +65,11 @@

ArgumentsValue

MFAIRSingleFactor object containing the initial parameters for the single factor MAFI model.

-
-

Slots

- - -
Y_missing
-

Logical. Whether the main data matrix Y is partially observed.

- - -
n_obs
-

Integer. Total number of observed entries in Y.

- - -
diff --git a/search.json b/search.json index a4a2b30..ebbff04 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://yanglabhkust.github.io/mfair/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 mfair authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"ml100k-data","dir":"Articles","previous_headings":"","what":"ml100k data","title":"Enrichment of movie genre information","text":"row represents user column represents movie rating matrix, row represents movie column represents genre genre matrix (use help(ml100k) details data).","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"fitting-the-mfai-model","dir":"Articles","previous_headings":"","what":"Fitting the MFAI model","title":"Enrichment of movie genre information","text":"use rating matrix main data matrix \\(Y\\), genre data frame auxiliary matrix \\(X\\). proceed fit MFAI model top three factors.","code":"# Create MFAIR object Y <- t(ml100k$rating) X <- ml100k$genre mfairObject <- createMFAIR(Y, X, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, save_init = TRUE, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Initialize the parameters of factor 1...... #> After 2 iterations Stage 1 ends! #> After 59 iterations Stage 2 ends! #> Factor 1 retained! #> Save the initializaiton information...... #> Initialize the parameters of factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Save the initializaiton information...... #> Initialize the parameters of factor 3...... #> After 2 iterations Stage 1 ends! #> After 29 iterations Stage 2 ends! #> Factor 3 retained! #> Save the initializaiton information......"},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"importance-score","dir":"Articles","previous_headings":"","what":"Importance score","title":"Enrichment of movie genre information","text":"fitting MFAI model, can use getImportance() function obtain importance score genre within factor. higher importance score , specific movie genre contributes improving model.","code":"# Get importance score importance <- as.data.frame(getImportance(mfairObject, which_factors = 1:3)) importance$Genre <- rownames(importance) importance_long <- melt( data = importance, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_long$Genre <- factor(importance_long$Genre, levels = rev(colnames(X))) # head(importance_long) # Visualize the importance score p1 <- ggplot( data = importance_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 10), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p1"},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"negative-control","dir":"Articles","previous_headings":"","what":"Negative control","title":"Enrichment of movie genre information","text":"Next, let’s create permuted movie genre matrix \\(X^{\\text{pmt}}\\), column \\(X^{\\text{pmt}}\\) obtained permuting entries corresponding column real genre data \\(X\\). fit MFAI model \\(Y\\) \\(X^{\\text{pmt}}\\) input. MFAI correctly assigns low importance scores permuted genres, suggesting MFAI avoids incorporating irrelevant auxiliary information. last, use \\(X^{\\text{}} = [X, X^{\\text{pmt}}]\\) input auxiliary information fit MFAI model. MFAI successfully distinguished useful movie genres irrelevant ones. Moreover, importance scores obtained using \\(X^{\\text{}}\\) highly consistent obtained using \\(X\\) \\(X^{\\text{pmt}}\\) separate inputs, indicating stability robustness MFAI.","code":"n_pmt <- dim(X)[2] X_pmt <- apply(X, MARGIN = 2, FUN = function(x) { N <- length(x) x[sample(x = c(1:N), size = N, replace = FALSE)] } ) X_pmt <- as.data.frame(X_pmt) colnames(X_pmt) <- paste0(colnames(X), \"_permuted\") # Create MFAIR object and use the same initialization mfairObject_pmt <- createMFAIR(Y, X_pmt, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! mfairObject_pmt@initialization <- mfairObject@initialization # Fit the MFAI model mfairObject_pmt <- fitGreedy(mfairObject_pmt, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Use the user-specific initialization for factor 1...... #> After 2 iterations Stage 1 ends! #> After 58 iterations Stage 2 ends! #> Factor 1 retained! #> Use the user-specific initialization for factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Use the user-specific initialization for factor 3...... #> After 2 iterations Stage 1 ends! #> After 28 iterations Stage 2 ends! #> Factor 3 retained! # Get importance score importance_pmt <- as.data.frame(getImportance(mfairObject_pmt, which_factors = 1:3)) importance_pmt$Genre <- rownames(importance_pmt) importance_pmt_long <- melt( data = importance_pmt, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_pmt_long$Genre <- factor(importance_pmt_long$Genre, levels = rev(colnames(X_pmt))) # head(importance_pmt_long) # Visualize the importance score p2 <- ggplot( data = importance_pmt_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 10), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p2 X_both <- cbind(X, X_pmt) # Create MFAIR object and use the same initialization mfairObject_both <- createMFAIR(Y, X_both, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! mfairObject_both@initialization <- mfairObject@initialization # Fit the MFAI model mfairObject_both <- fitGreedy(mfairObject_both, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Use the user-specific initialization for factor 1...... #> After 2 iterations Stage 1 ends! #> After 59 iterations Stage 2 ends! #> Factor 1 retained! #> Use the user-specific initialization for factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Use the user-specific initialization for factor 3...... #> After 2 iterations Stage 1 ends! #> After 29 iterations Stage 2 ends! #> Factor 3 retained! # Get importance score importance_both <- as.data.frame(getImportance(mfairObject_both, which_factors = 1:3)) importance_both$Genre <- rownames(importance_both) importance_both_long <- melt( data = importance_both, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_both_long$Genre <- factor(importance_both_long$Genre, levels = rev(colnames(X_both))) # head(importance_both_long) # Visualize the importance score p3 <- ggplot( data = importance_both_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 8), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p3"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"neocortex-data","dir":"Articles","previous_headings":"","what":"neocortex data","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"spatial temporal patterns gene regulation brain development attracted great deal attention neuroscience community. availability gene expression profiles collected multiple brain regions time periods provides unprecedented chance characterize human brain development. select genes consistent spatial patterns across individuals using concept differential stability (DS), defined tendency gene exhibit reproducible differential expression relationships across brain structures. include 2,000 genes highest DS get expression matrix, row represents sample tissue nercortex region column represents gene. sample_info data frame contains sample information, row represents sample tissue four columns respectively represent sample ID, neocortex area, hemisphere, time periods.","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"fitting-the-mfai-model","dir":"Articles","previous_headings":"","what":"Fitting the MFAI model","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"use expression matrix main data matrix \\(Y\\), spatial temporal information contained sample_info data frame auxiliary matrix \\(X\\). proceed fit MFAI model top three factors.","code":"# Create MFAIR object Y <- neocortex$expression X <- neocortex$sample_info[, c(\"Region\", \"Stage\")] mfairObject <- createMFAIR(Y, X, K_max = 3) #> The main data matrix Y is completely observed! #> The main data matrix Y has been centered with mean = 7.64309222668172! # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(tol_stage2 = 1e-6, verbose_loop = FALSE) ) #> Set K_max = 3! #> Initialize the parameters of factor 1...... #> After 3 iterations Stage 1 ends! #> After 18 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 3 iterations Stage 1 ends! #> After 46 iterations Stage 2 ends! #> Factor 2 retained! #> Initialize the parameters of factor 3...... #> After 2 iterations Stage 1 ends! #> After 600 iterations Stage 2 ends! #> Factor 3 retained!"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"spatial-and-temporal-dynamics","dir":"Articles","previous_headings":"","what":"Spatial and temporal dynamics","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"gain insights, visualize dynamic patterns top three factors across different neocortex areas time periods, represented \\(\\{ F_1 (\\cdot) , F_2 (\\cdot) , F_3 (\\cdot) \\}\\).","code":"region <- c(\"OFC\", \"DFC\", \"VFC\", \"MFC\", \"M1C\", \"S1C\", \"IPC\", \"A1C\", \"STC\", \"ITC\", \"V1C\") stage <- c(3:15) X_new <- data.frame( Region = factor(rep(region, length(stage)), levels = region), Stage = rep(stage, each = length(region)) ) FX <- predictFX(mfairObject, newdata = X_new, which_factors = c(1:3) ) # Normalize each factor to have l2-norm equal one FX <- apply(FX, MARGIN = 2, FUN = function(x) { x / sqrt(sum(x^2)) } ) FX <- data.frame(X_new, FX) colnames(FX) <- c(\"Neocortex area\", \"Time period\", paste(\"Factor\", c(1:3))) FX[, \"Time period\"] <- factor(FX[, \"Time period\"], levels = stage) head(FX) #> Neocortex area Time period Factor 1 Factor 2 Factor 3 #> 1 OFC 3 -0.08530621 -0.1275302 0.1373010 #> 2 DFC 3 -0.08530621 -0.1281884 0.1720814 #> 3 VFC 3 -0.08530621 -0.1279504 0.1329457 #> 4 MFC 3 -0.08490720 -0.1272283 0.1652383 #> 5 M1C 3 -0.08530621 -0.1277465 0.1578517 #> 6 S1C 3 -0.08510786 -0.1277465 0.1462684 # Convert the wide table to the long table FX_long <- melt( data = FX, id.vars = c(\"Neocortex area\", \"Time period\"), variable.name = \"Factor\", value.name = \"F\" ) head(FX_long) #> Neocortex area Time period Factor F #> 1 OFC 3 Factor 1 -0.08530621 #> 2 DFC 3 Factor 1 -0.08530621 #> 3 VFC 3 Factor 1 -0.08530621 #> 4 MFC 3 Factor 1 -0.08490720 #> 5 M1C 3 Factor 1 -0.08530621 #> 6 S1C 3 Factor 1 -0.08510786 # Visualization of F(.) p <- ggplot( data = FX_long, aes(x = `Time period`, y = F, linetype = `Neocortex area`, colour = `Neocortex area`, group = `Neocortex area`) ) + geom_line(linewidth = 0.5) + ylab(NULL) + theme_bw() + scale_y_continuous(n.breaks = 4) + theme( text = element_text(size = 12), axis.text.y = element_text(size = 10), axis.title.x = element_text(size = 10, margin = margin(t = 3)), axis.text.x = element_text(size = 10), legend.title = element_text(size = 12), legend.text = element_text(size = 10), legend.key.size = unit(0.8, \"cm\"), legend.key.width = unit(0.8, \"cm\"), legend.position = \"right\", panel.spacing.y = unit(0.2, \"cm\"), # Space between panels aspect.ratio = 0.4 ) + facet_grid(Factor ~ ., scales = \"free_y\") p"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"gene-set-enrichment-analysis","dir":"Articles","previous_headings":"","what":"Gene set enrichment analysis","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"first calculated relative weight \\(k\\)-th factor \\(m\\)-th gene \\(\\left| W_{mk} \\right| / \\sum_{k^{\\prime}=1}^{3} \\left| W_{mk^{\\prime}} \\right|\\), \\(W_{m \\cdot} \\\\mathbb{R}^{3 \\times 1}\\) \\(m\\)-th row gene factors, selected top 300 weighted genes factor form gene sets. can conduct gene set enrichment analysis based Gene Ontology factor.","code":"# Inferred gene factors (corresponding to the W matrix in the MFAI paper) gene_factors <- mfairObject@W rownames(gene_factors) <- colnames(mfairObject@Y) # Assign gene symbols colnames(gene_factors) <- paste(\"Factor\", c(1:3)) head(gene_factors) #> Factor 1 Factor 2 Factor 3 #> DCUN1D2 0.07856363 0.06006969 0.16380535 #> ARRB1 -0.27132066 0.06770678 -0.12942443 #> PDE1B -0.01294115 0.39845756 0.20161848 #> PDE7B 0.01081660 0.41041139 0.52982200 #> TOX -0.11471109 -0.20478958 0.60887294 #> LOXHD1 0.24888992 0.01732711 -0.03634836 # Heatmap of the inferred gene factors pheatmap::pheatmap(t(gene_factors), scale = \"column\", clustering_method = \"complete\", cluster_row = FALSE, cluster_col = TRUE, treeheight_row = 0, treeheight_col = 0, border = FALSE, show_rownames = TRUE, show_colnames = FALSE, cellwidth = 0.2, cellheight = 40, fontsize = 12 ) # Normalize each factor to have l2-norm equal one gene_factors <- apply(gene_factors, MARGIN = 2, FUN = function(x) { x / sqrt(sum(x^2)) } ) # Relative weight gene_factors <- abs(gene_factors) gene_factors <- gene_factors / rowSums(gene_factors) M <- nrow(gene_factors)[1] # Total number of genes M = 2,000 ntop <- M * 0.15 # We use the top 300 weighted genes in each factor to form the gene sets # Index of top genes top_gene_idx <- apply(gene_factors, MARGIN = 2, FUN = function(x) { which(rank(-x) <= ntop) } ) top_genes <- apply(top_gene_idx, MARGIN = 2, FUN = function(x) { rownames(gene_factors)[x] } ) colnames(top_genes) <- paste(\"Factor\", c(1:3)) head(top_genes) #> Factor 1 Factor 2 Factor 3 #> [1,] \"ARRB1\" \"PDE1B\" \"AJAP1\" #> [2,] \"LOXHD1\" \"PDE7B\" \"KCNA3\" #> [3,] \"TYRP1\" \"KCNA2\" \"ASTN2\" #> [4,] \"PRKG1\" \"PMP22\" \"EMID1\" #> [5,] \"MS4A8B\" \"GPR155\" \"GPR52\" #> [6,] \"FAM131B\" \"SMAD2\" \"SEC24D\""},{"path":"https://yanglabhkust.github.io/mfair/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Zhiwei Wang. Author, maintainer.","code":""},{"path":"https://yanglabhkust.github.io/mfair/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Wang, Z., Zhang, F., Zheng, C., Hu, X., Cai, M., Yang, C. (2023). MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. arXiv preprint arXiv:2303.02566. Wang, Z. (2023). mfair: Matrix Factorization Auxiliary Information R. R package version 0.0.0.9000. https://yanglabhkust.github.io/mfair/.","code":"@Article{, title = {MFAI: A Scalable Bayesian Matrix Factorization Approach to Leveraging Auxiliary Information}, author = {Zhiwei Wang and Fa Zhang and Cong Zheng and Xianghong Hu and Mingxuan Cai and Can Yang}, journal = {arXiv preprint arXiv:2303.02566}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.02566}, } @Manual{, title = {mfair: Matrix Factorization with Auxiliary Information in R}, author = {Zhiwei Wang}, year = {2023}, note = {R package version 0.0.0.9000}, url = {https://yanglabhkust.github.io/mfair/}, }"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"mfair-matrix-factorization-with-auxiliary-information-in-r","dir":"","previous_headings":"","what":"Matrix Factorization with Auxiliary Information in R","title":"Matrix Factorization with Auxiliary Information in R","text":"R package mfair implements methods based paper MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. MFAI integrates gradient boosted trees probabilistic matrix factorization framework effectively leverage auxiliary information. parameters MAFI can automatically determined empirical Bayes framework, making adaptive utilization auxiliary information immune overfitting.","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Matrix Factorization with Auxiliary Information in R","text":"quick start, can install development version mfair GitHub : illustration examples, can alternatively use: build vignettes simultaneously. Please note can take minutes.","code":"# install.packages(\"devtools\") devtools::install_github(\"YangLabHKUST/mfair\") # install.packages(\"devtools\") devtools::install_github(\"YangLabHKUST/mfair\", build_vignettes = TRUE)"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"examples","dir":"","previous_headings":"","what":"Examples","title":"Matrix Factorization with Auxiliary Information in R","text":"basic example shows solve common problem: mfair can also handle matrix missing entries: Empirically, backfitting algorithm can improve performance: Explore vignette illustrating enrichment movie genre information: Explore vignette illustrating spatial temporal dynamics gene regulation among brain tissues: documentation examples, please visit package website.","code":"set.seed(20230306) library(mfair) #> Loading required package: rpart # Simulate data # Set the data dimension and rank N <- 100 M <- 100 K_true <- 2L # Set the proportion of variance explained (PVE) PVE_Z <- 0.8 PVE_Y <- 0.5 # Generate auxiliary information X X1 <- runif(N, min = -10, max = 10) X2 <- runif(N, min = -10, max = 10) X <- cbind(X1, X2) # F(X) FX1 <- X1 / 2 - X2 FX2 <- (X1^2 - X2^2 + 2 * X1 * X2) / 10 FX <- cbind(FX1, FX2) # Generate loadings Z (= F(X) + noise) sig1_sq <- var(FX1) * (1 / PVE_Z - 1) Z1 <- FX1 + rnorm(n = N, mean = 0, sd = sqrt(sig1_sq)) sig2_sq <- var(FX2) * (1 / PVE_Z - 1) Z2 <- FX2 + rnorm(n = N, mean = 0, sd = sqrt(sig2_sq)) Z <- cbind(Z1, Z2) # Generate factors W W <- matrix(rnorm(M * K_true), nrow = M, ncol = K_true) # Generate the main data matrix Y_obs (= Y + noise) Y <- Z %*% t(W) Y_var <- var(as.vector(Y)) epsilon_sq <- Y_var * (1 / PVE_Y - 1) Y_obs <- Y + matrix( rnorm(N * M, mean = 0, sd = sqrt(epsilon_sq) ), nrow = N, ncol = M ) # Create MFAIR object mfairObject <- createMFAIR(Y_obs, X, K_max = K_true) # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(verbose_loop = FALSE)) #> Set K_max = 2! #> Initialize the parameters of factor 1...... #> After 1 iterations Stage 1 ends! #> After 43 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 1 iterations Stage 1 ends! #> After 40 iterations Stage 2 ends! #> Factor 2 retained! # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) #> The main data matrix Y has no missing entries! # Root-mean-square-error sqrt(mean((Y_obs - Y_hat)^2)) #> [1] 12.22526 # Predicted/true matrix variance ratio var(as.vector(Y_hat)) / var(as.vector(Y_obs)) #> [1] 0.471485 # Prediction/noise variance ratio var(as.vector(Y_hat)) / var(as.vector(Y_obs - Y_hat)) #> [1] 0.9884637 # Split the data into the training set and test set n_all <- N * M training_ratio <- 0.5 train_set <- sample(1:n_all, n_all * training_ratio, replace = FALSE) Y_train <- Y_test <- Y_obs Y_train[-train_set] <- NA Y_test[train_set] <- NA # Create MFAIR object mfairObject <- createMFAIR(Y_train, X, K_max = K_true) # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(verbose_loop = FALSE)) #> Set K_max = 2! #> Initialize the parameters of factor 1...... #> After 1 iterations Stage 1 ends! #> After 68 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 1 iterations Stage 1 ends! #> After 66 iterations Stage 2 ends! #> Factor 2 retained! # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) # Root-mean-square-error sqrt(mean((Y_test - Y_hat)^2, na.rm = TRUE)) #> [1] 12.88825 # Predicted/true matrix variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs), na.rm = TRUE) #> [1] 0.4311948 # Prediction/noise variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs - Y_hat), na.rm = TRUE) #> [1] 0.8554015 # Refine the MFAI model with the backfitting algorithm mfairObject <- fitBack(mfairObject, verbose_bf_inner = FALSE, sf_para = list(verbose_sf = FALSE, verbose_loop = FALSE) ) #> Iteration: 1, relative difference of model parameters: 0.2212487. #> Iteration: 2, relative difference of model parameters: 0.05861598. #> Iteration: 3, relative difference of model parameters: 0.01781071. #> Iteration: 4, relative difference of model parameters: 0.02649139. #> Iteration: 5, relative difference of model parameters: 0.01830385. #> Iteration: 6, relative difference of model parameters: 0.007152868. # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) # Root-mean-square-error sqrt(mean((Y_test - Y_hat)^2, na.rm = TRUE)) #> [1] 12.84545 # Predicted/true matrix variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs), na.rm = TRUE) #> [1] 0.4434191 # Prediction/noise variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs - Y_hat), na.rm = TRUE) #> [1] 0.8846744 vignette(\"ml100k\") vignette(\"neocortex\")"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"citing-our-work","dir":"","previous_headings":"","what":"Citing our work","title":"Matrix Factorization with Auxiliary Information in R","text":"find mfair package source code repository useful work, please cite: Wang, Z., Zhang, F., Zheng, C., Hu, X., Cai, M., Yang, C. (2023). MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. arXiv preprint arXiv:2303.02566. URL: https://doi.org/10.48550/arXiv.2303.02566.","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"development","dir":"","previous_headings":"","what":"Development","title":"Matrix Factorization with Auxiliary Information in R","text":"package developed Zhiwei Wang (zhiwei.wang@connect.ust.hk).","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"contact-information","dir":"","previous_headings":"","what":"Contact Information","title":"Matrix Factorization with Auxiliary Information in R","text":"Please feel free contact Zhiwei Wang (zhiwei.wang@connect.ust.hk), Prof. Mingxuan Cai (mingxcai@cityu.edu.hk), Prof. Can Yang (macyang@ust.hk) inquiries.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":null,"dir":"Reference","previous_headings":"","what":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"MFAIR object number slots store information. Key slots access listed .","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"MFAIR class.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"Y matrix Matrix::dgCMatrix. main data matrix N samples M features. X data.frame. auxiliary information data frame N samples C covariates. Y_sparse Logical. Whether main data matrix Y stored sparse mode. Y_center Logical. Whether main data matrix Y centered. Y_mean Numeric. Mean main data matrix Y centered. Zero . Y_missing Logical. Whether main data matrix Y partially observed. n_obs integer. Total number observed entries Y. N integer. Number rows (samples) Y, also number rows (samples) X. M integer. Number columns (features) Y. C integer. Number columns (auxiliary covariates) X. K_max integer. Note increasing K_max ensure actual K also increases, since K_max just upper bound model automatically infer K K_max. want inference larger rank K, please make sure K_max large enough tol_snr parameter fitting function fitGreedy() small enough, simultaneously. K integer. inferred rank Y. Z N * K matrix. Estimated loading matrix, corresponding inferred posterior mean Z MFAI model. a_sq matrix containing posterior variance Z k-th column corresponding k-th loading. fully observed Y, N elements one specific loading share posterior variance, a_sq 1 * K matrix. Y missing data, elements one specific loading different posterior variances, a_sq N * K matrix. W M * K matrix. Estimated factor matrix, corresponding inferred posterior mean W MFAI model. b_sq matrix containing posterior variance W k-th column corresponding k-th factor. fully observed Y, M elements one specific factor share posterior variance, b_sq 1 * K matrix. Y missing data, elements one specific factor different posterior variances, b_sq M * K matrix. tau Numeric. vector length K, containing precision parameter pair loading/factor. beta Numeric. vector length K, containing precision parameter loading Z_k. FX N * K matrix representing prior mean Z, corresponding F(X) MFAI model. tree_0 1 * K matrix containing tree_0 k-th column corresponding k-th factor. Tree_0 defined mean mu vector factor. tree_lists list length K, containing K fitted functions function represented list trees, .e., k-th list corresponds function F_k(.) MFAI model. initialization list. Initialization fitted model. learning_rate Numeric. learning rate gradient boosting part. tree_parameters list options control details rpart algorithm. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":null,"dir":"Reference","previous_headings":"","what":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"MFAIRSingleFactor object contains key information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"MFAIRSingleFactor class.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"Y_missing Logical. Whether main data matrix Y partially observed. n_obs Integer. Total number observed entries Y. mu vector length N representing inferred loading, corresponding posterior mean z single factor MFAI model. a_sq Numeric. posterior variance loading z. fully observed Y, N elements loading share posterior variance, a_sq single number. Y missing data, elements different posterior variances, a_sq vector length N. nu vector length M representing inferred factor, corresponding posterior mean w single factor MFAI model. b_sq Numeric. posterior variance factor w. fully observed Y, M elements factor share posterior variance, b_sq single number. Y missing data, elements different posterior variances, b_sq vector length M. tau Numeric. Precision parameter pair loading/factor. beta Numeric. Precision parameter loading z. FX vector length N representing prior mean z, corresponding F(X) single factor MFAI model. tree_0 Numeric. Tree_0 defined mean mu vector. tree_list list containing multiple decision trees, corresponding function F(.) single factor MFAI model. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"Append fitted factor MFAIR object greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"","code":"appendMFAIR(object, object_sf)"},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"object MFAIR object containing information first factors. object_sf MFAIRSingleFactor needed appended.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"MFAIR object containing information new fitted single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Create MFAIR object main data matrix auxiliary information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"","code":"createMFAIR( Y, X, Y_sparse = FALSE, Y_center = TRUE, K_max = 1L, project = \"MFAIR\" )"},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Y matrix Matrix::dgCMatrix. main data matrix N samples M features. X data.frame. auxiliary information data frame N samples C covariates. Y_sparse Logical. Determines whether use spase mode Y. Y_center Logical. Determines whether centering performed. K_max integer. maximum rank allowed model. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Returns MFAIR object, main data matrix auxiliary information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the MFAI model using backfitting algorithm. — fitBack","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"Fit MFAI model using backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"","code":"fitBack( object, learning_rate = 0.1, minsplit = 10, minbucket = round(minsplit/3), maxdepth = 2, other_tree_para = list(), iter_max_bf = 5000, tol_bf = 0.01, verbose_bf_inner = TRUE, verbose_bf_outer = TRUE, sf_para = list() )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"object MFAIR object. learning_rate Numeric. Parameter gradient boosting part. minsplit Numeric. Parameter gradient boosting part. minbucket Numeric. Parameter gradient boosting part. maxdepth Numeric. Parameter gradient boosting part. other_tree_para list containing parameters gradient boosting part. See rpart::rpart.control() details. iter_max_bf Integer. Maximum iterations allowed. tol_bf Numeric. convergence criterion. verbose_bf_inner Logical. Whether display detailed information inner loop. verbose_bf_outer Logical. Whether display detailed information outer loop. sf_para list containing parameters fitting single factor MFAI model. See fitSFFully() fitSFMissing() details.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"MFAIR object containing information fitted MFAI model using backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the MFAI model using greedy algorithm. — fitGreedy","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"Fit MFAI model using greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"","code":"fitGreedy( object, K_max = NULL, learning_rate = 0.1, minsplit = 10, minbucket = round(minsplit/3), maxdepth = 2, other_tree_para = list(), tol_snr = 0.002, verbose_greedy = TRUE, save_init = FALSE, sf_para = list() )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"object MFAIR object. K_max Integer. maximum rank allowed MFAI model. learning_rate Numeric. Parameter gradient boosting part. minsplit Integer. Parameter gradient boosting part. minbucket Integer. Parameter gradient boosting part. maxdepth Integer. Parameter gradient boosting part. other_tree_para list containing parameters gradient boosting part. See rpart::rpart.control() details. tol_snr Numeric. convergence criterion determine inferred rank data. verbose_greedy Logical. Whether display detailed information fitting model. save_init Logical. Whether save initialization model. sf_para list containing parameters fitting single factor MFAI model. See fitSFFully() fitSFMissing() details.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"MFAIR object containing information fitted MFAI model using greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"Fit single factor MFAI model fully observed main data matrix.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"","code":"fitSFFully( Y, X, init, learning_rate, tree_parameters, stage1 = TRUE, iter_max = 5000, tol_stage1 = 0.1, tol_stage2 = 1e-05, verbose_sf = TRUE, verbose_loop = TRUE, save_tree_list = TRUE )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"Y Main data matrix. X data.frame containing auxiliary information. init MFAIRSingleFactor object containing initial parameters single factor MAFI model. learning_rate Numeric. Parameter gradient boosting part. tree_parameters list containing parameters gradient boosting part. stage1 Logical. Whether perform fitting algorithm stage1. greedy algorithm needs backfitting algorithm need. iter_max Integer. Maximum iterations allowed. tol_stage1 Numeric. Convergence criterion first step. tol_stage2 Numeric. Convergence criterion first step. verbose_sf Logical. Whether display detailed information. verbose_loop Logical. Whether display detailed information looping. save_tree_list Logical. Whether save tree list.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"Fit single factor MFAI model partially observed main data matrix.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"","code":"fitSFMissing( Y, obs_indices, X, init, learning_rate, tree_parameters, stage1 = TRUE, iter_max = 5000, tol_stage1 = 0.1, tol_stage2 = 1e-05, verbose_sf = TRUE, verbose_loop = TRUE, save_tree_list = TRUE )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"Y Main data matrix. obs_indices Indices observed entries main data matrix. X data.frame containing auxiliary information. init MFAIRSingleFactor object containing initial parameters single factor MAFI model. learning_rate Numeric. Parameter gradient boosting part. tree_parameters list containing parameters gradient boosting part. stage1 Logical. Whether perform fitting algorithm stage1. greedy algorithm needs backfitting algorithm need. iter_max Integer. Maximum iterations allowed. tol_stage1 Numeric. Convergence criterion first step. tol_stage2 Numeric. Convergence criterion first step. verbose_sf Logical. Whether display detailed information. verbose_loop Logical. Whether display detailed information looping. save_tree_list Logical. Whether save tree list.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Compute evidence lower bound (ELBO) fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"","code":"getELBO(Y, object)"},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Y Observed main data matrix. object MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Numeric. ELBO.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":null,"dir":"Reference","previous_headings":"","what":"Get importance measures of auxiliary covariates. — getImportance","title":"Get importance measures of auxiliary covariates. — getImportance","text":"Get importance measures auxiliary covariates.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get importance measures of auxiliary covariates. — getImportance","text":"","code":"getImportance(object, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get importance measures of auxiliary covariates. — getImportance","text":"object MFAIR object. which_factors factors, .e., fitted functions evaluated. K factors evaluated default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get importance measures of auxiliary covariates. — getImportance","text":"Importance score matrix. row auxiliary covariate column factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get importance measures of auxiliary covariates. — getImportance","text":"rpart::rpart() function automatically change special characters variable names dot may cause inconsistency errors. Please ensure auxiliary covariates' names contain special characters want use function.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"Get importance measures auxiliary covariates single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"","code":"getImportanceSF(tree_list, variables_names)"},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"tree_list fitted function represented list trees. variables_names names auxiliary covariates.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"Importance score vector. entry importance score one auxiliary covariate.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Initialize the parameters for the single factor MAFI model. — initSF","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"Initialize parameters single factor MAFI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"","code":"initSF(Y, Y_missing = NULL, n_obs)"},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"Y Main data matrix. Y_missing Logical. Whether main data matrix partially observed. automatically judged specified (default value NULL). n_obs Integer. Total number observed entries.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"MFAIRSingleFactor object containing initial parameters single factor MAFI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"Y_missing Logical. Whether main data matrix Y partially observed. n_obs Integer. Total number observed entries Y.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/matrixORdgCMatrix-class.html","id":null,"dir":"Reference","previous_headings":"","what":"Define the matrixORdgCMatrix class as the union of matrix and Matrix::dgCMatrix — matrixORdgCMatrix-class","title":"Define the matrixORdgCMatrix class as the union of matrix and Matrix::dgCMatrix — matrixORdgCMatrix-class","text":"Define matrixORdgCMatrix class union matrix Matrix::dgCMatrix","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":null,"dir":"Reference","previous_headings":"","what":"MovieLens 100K data. — ml100k","title":"MovieLens 100K data. — ml100k","text":"list containing movie rating, user information, movie genres MovieLens 100K data.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MovieLens 100K data. — ml100k","text":"","code":"ml100k"},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"MovieLens 100K data. — ml100k","text":"list containing movie rating matrix, user information data frame, movie genres data frame. rating Movie rating matrix 943 users 1,682 movies (0–5 star rating). user data frame containing information 943 users, three columns correspond age, gender, occupation respectively. genre binary data frame containing genre information 1,682 movies, column corresponds one specific genre.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"MovieLens 100K data. — ml100k","text":"https://grouplens.org/datasets/movielens/100k/","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":null,"dir":"Reference","previous_headings":"","what":"Human brain gene expression data. — neocortex","title":"Human brain gene expression data. — neocortex","text":"list containing bulk gene expression human brain tissue sample information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Human brain gene expression data. — neocortex","text":"","code":"neocortex"},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Human brain gene expression data. — neocortex","text":"list containing gene expression data matrix tissue sample information data frame. expression Bulk gene expression matrix 886 tissue samples neocortex region 2,000 genes highest differential stability. sample_info data frame containing information 886 bulk tissue samples neocortex region. four columns correspond sample ID, neocortex area, hemisphere, time periods respectively.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Human brain gene expression data. — neocortex","text":"https://hbatlas.org/pages/data","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for MFAIR object. — predict,MFAIR-method","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"Prediction function MFAIR object.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"","code":"# S4 method for MFAIR predict(object, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"object model object prediction desired. which_factors factors, .e., columns Z W, used make prediction. K factors used default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"Predicted matrix dimension Y.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"Prediction function MFAIRSingleFactor object.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"","code":"# S4 method for MFAIRSingleFactor predict(object)"},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"object model object prediction desired.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"Predicted matrix dimension Y.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for fitted functions. — predictFX","title":"Prediction function for fitted functions. — predictFX","text":"Prediction function fitted functions.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for fitted functions. — predictFX","text":"","code":"predictFX(object, newdata, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for fitted functions. — predictFX","text":"object MFAIR object. newdata Data frame containing values predictions required. which_factors factors, .e., fitted functions used. K factors used default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for fitted functions. — predictFX","text":"matrix containing predicted F(X). row new sample column factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for fitted function F() in single factor. — predictFXSF","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"Prediction function fitted function F() single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"","code":"predictFXSF(tree_list, newdata, learning_rate)"},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"tree_list fitted function represented list trees. newdata Data frame containing values predictions required. learning_rate Numeric. learning rate gradient boosting part.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"vector containing predicted F(X). entry corresponds new sample.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"Update k-th factor MFAIR object backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"","code":"updateMFAIR(object, object_sf, k)"},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"object MFAIR object containing initial information K-factor MFAI model. object_sf MFAIRSingleFactor containing information newly fitted single factor MFAI model. k Integer. fator updated.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"MFAIR object containing information new fitted single factor.","code":""}] +[{"path":"https://yanglabhkust.github.io/mfair/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 mfair authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"ml100k-data","dir":"Articles","previous_headings":"","what":"ml100k data","title":"Enrichment of movie genre information","text":"row represents user column represents movie rating matrix, row represents movie column represents genre genre matrix (use help(ml100k) details data).","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"fitting-the-mfai-model","dir":"Articles","previous_headings":"","what":"Fitting the MFAI model","title":"Enrichment of movie genre information","text":"use rating matrix main data matrix \\(Y\\), genre data frame auxiliary matrix \\(X\\). proceed fit MFAI model top three factors.","code":"# Create MFAIR object Y <- t(ml100k$rating) X <- ml100k$genre mfairObject <- createMFAIR(Y, X, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, save_init = TRUE, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Initialize the parameters of factor 1...... #> After 2 iterations Stage 1 ends! #> After 59 iterations Stage 2 ends! #> Factor 1 retained! #> Save the initializaiton information...... #> Initialize the parameters of factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Save the initializaiton information...... #> Initialize the parameters of factor 3...... #> After 2 iterations Stage 1 ends! #> After 29 iterations Stage 2 ends! #> Factor 3 retained! #> Save the initializaiton information......"},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"importance-score","dir":"Articles","previous_headings":"","what":"Importance score","title":"Enrichment of movie genre information","text":"fitting MFAI model, can use getImportance() function obtain importance score genre within factor. higher importance score , specific movie genre contributes improving model.","code":"# Get importance score importance <- as.data.frame(getImportance(mfairObject, which_factors = 1:3)) importance$Genre <- rownames(importance) importance_long <- melt( data = importance, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_long$Genre <- factor(importance_long$Genre, levels = rev(colnames(X))) # head(importance_long) # Visualize the importance score p1 <- ggplot( data = importance_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 10), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p1"},{"path":"https://yanglabhkust.github.io/mfair/articles/ml100k.html","id":"negative-control","dir":"Articles","previous_headings":"","what":"Negative control","title":"Enrichment of movie genre information","text":"Next, let’s create permuted movie genre matrix \\(X^{\\text{pmt}}\\), column \\(X^{\\text{pmt}}\\) obtained permuting entries corresponding column real genre data \\(X\\). fit MFAI model \\(Y\\) \\(X^{\\text{pmt}}\\) input. MFAI correctly assigns low importance scores permuted genres, suggesting MFAI avoids incorporating irrelevant auxiliary information. last, use \\(X^{\\text{}} = [X, X^{\\text{pmt}}]\\) input auxiliary information fit MFAI model. MFAI successfully distinguished useful movie genres irrelevant ones. Moreover, importance scores obtained using \\(X^{\\text{}}\\) highly consistent obtained using \\(X\\) \\(X^{\\text{pmt}}\\) separate inputs, indicating stability robustness MFAI.","code":"n_pmt <- dim(X)[2] X_pmt <- apply(X, MARGIN = 2, FUN = function(x) { N <- length(x) x[sample(x = c(1:N), size = N, replace = FALSE)] } ) X_pmt <- as.data.frame(X_pmt) colnames(X_pmt) <- paste0(colnames(X), \"_permuted\") # Create MFAIR object and use the same initialization mfairObject_pmt <- createMFAIR(Y, X_pmt, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! mfairObject_pmt@initialization <- mfairObject@initialization # Fit the MFAI model mfairObject_pmt <- fitGreedy(mfairObject_pmt, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Use the user-specific initialization for factor 1...... #> After 2 iterations Stage 1 ends! #> After 58 iterations Stage 2 ends! #> Factor 1 retained! #> Use the user-specific initialization for factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Use the user-specific initialization for factor 3...... #> After 2 iterations Stage 1 ends! #> After 28 iterations Stage 2 ends! #> Factor 3 retained! # Get importance score importance_pmt <- as.data.frame(getImportance(mfairObject_pmt, which_factors = 1:3)) importance_pmt$Genre <- rownames(importance_pmt) importance_pmt_long <- melt( data = importance_pmt, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_pmt_long$Genre <- factor(importance_pmt_long$Genre, levels = rev(colnames(X_pmt))) # head(importance_pmt_long) # Visualize the importance score p2 <- ggplot( data = importance_pmt_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 10), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p2 X_both <- cbind(X, X_pmt) # Create MFAIR object and use the same initialization mfairObject_both <- createMFAIR(Y, X_both, K_max = 3) #> The main data matrix Y is partially observed! #> The main data matrix Y has been centered with mean = 3.52986! mfairObject_both@initialization <- mfairObject@initialization # Fit the MFAI model mfairObject_both <- fitGreedy(mfairObject_both, sf_para = list(verbose_loop = FALSE) ) #> Set K_max = 3! #> Use the user-specific initialization for factor 1...... #> After 2 iterations Stage 1 ends! #> After 59 iterations Stage 2 ends! #> Factor 1 retained! #> Use the user-specific initialization for factor 2...... #> After 2 iterations Stage 1 ends! #> After 77 iterations Stage 2 ends! #> Factor 2 retained! #> Use the user-specific initialization for factor 3...... #> After 2 iterations Stage 1 ends! #> After 29 iterations Stage 2 ends! #> Factor 3 retained! # Get importance score importance_both <- as.data.frame(getImportance(mfairObject_both, which_factors = 1:3)) importance_both$Genre <- rownames(importance_both) importance_both_long <- melt( data = importance_both, id.vars = \"Genre\", variable.name = \"Factor\", value.name = \"Importance\" ) importance_both_long$Genre <- factor(importance_both_long$Genre, levels = rev(colnames(X_both))) # head(importance_both_long) # Visualize the importance score p3 <- ggplot( data = importance_both_long, aes(x = Genre, y = Importance, fill = Genre) ) + geom_col() + coord_flip() + scale_y_continuous(labels = label_comma(accuracy = 1)) + xlab(NULL) + ylab(\"Importance score\") + guides(fill = \"none\") + theme_bw() + theme( text = element_text(size = 12), axis.title = element_text(size = 12), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 8), aspect.ratio = 2 ) + facet_grid(. ~ Factor, scales = \"free\") p3"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"neocortex-data","dir":"Articles","previous_headings":"","what":"neocortex data","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"spatial temporal patterns gene regulation brain development attracted great deal attention neuroscience community. availability gene expression profiles collected multiple brain regions time periods provides unprecedented chance characterize human brain development. select genes consistent spatial patterns across individuals using concept differential stability (DS), defined tendency gene exhibit reproducible differential expression relationships across brain structures. include 2,000 genes highest DS get expression matrix, row represents sample tissue nercortex region column represents gene. sample_info data frame contains sample information, row represents sample tissue four columns respectively represent sample ID, neocortex area, hemisphere, time periods.","code":""},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"fitting-the-mfai-model","dir":"Articles","previous_headings":"","what":"Fitting the MFAI model","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"use expression matrix main data matrix \\(Y\\), spatial temporal information contained sample_info data frame auxiliary matrix \\(X\\). proceed fit MFAI model top three factors.","code":"# Create MFAIR object Y <- neocortex$expression X <- neocortex$sample_info[, c(\"Region\", \"Stage\")] mfairObject <- createMFAIR(Y, X, K_max = 3) #> The main data matrix Y is completely observed! #> The main data matrix Y has been centered with mean = 7.64309222668172! # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(tol_stage2 = 1e-6, verbose_loop = FALSE) ) #> Set K_max = 3! #> Initialize the parameters of factor 1...... #> After 3 iterations Stage 1 ends! #> After 18 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 3 iterations Stage 1 ends! #> After 46 iterations Stage 2 ends! #> Factor 2 retained! #> Initialize the parameters of factor 3...... #> After 2 iterations Stage 1 ends! #> After 600 iterations Stage 2 ends! #> Factor 3 retained!"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"spatial-and-temporal-dynamics","dir":"Articles","previous_headings":"","what":"Spatial and temporal dynamics","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"gain insights, visualize dynamic patterns top three factors across different neocortex areas time periods, represented \\(\\{ F_1 (\\cdot) , F_2 (\\cdot) , F_3 (\\cdot) \\}\\).","code":"region <- c(\"OFC\", \"DFC\", \"VFC\", \"MFC\", \"M1C\", \"S1C\", \"IPC\", \"A1C\", \"STC\", \"ITC\", \"V1C\") stage <- c(3:15) X_new <- data.frame( Region = factor(rep(region, length(stage)), levels = region), Stage = rep(stage, each = length(region)) ) FX <- predictFX(mfairObject, newdata = X_new, which_factors = c(1:3) ) # Normalize each factor to have l2-norm equal one FX <- apply(FX, MARGIN = 2, FUN = function(x) { x / sqrt(sum(x^2)) } ) FX <- data.frame(X_new, FX) colnames(FX) <- c(\"Neocortex area\", \"Time period\", paste(\"Factor\", c(1:3))) FX[, \"Time period\"] <- factor(FX[, \"Time period\"], levels = stage) head(FX) #> Neocortex area Time period Factor 1 Factor 2 Factor 3 #> 1 OFC 3 -0.08530621 -0.1275302 0.1373010 #> 2 DFC 3 -0.08530621 -0.1281884 0.1720814 #> 3 VFC 3 -0.08530621 -0.1279504 0.1329457 #> 4 MFC 3 -0.08490720 -0.1272283 0.1652383 #> 5 M1C 3 -0.08530621 -0.1277465 0.1578517 #> 6 S1C 3 -0.08510786 -0.1277465 0.1462684 # Convert the wide table to the long table FX_long <- melt( data = FX, id.vars = c(\"Neocortex area\", \"Time period\"), variable.name = \"Factor\", value.name = \"F\" ) head(FX_long) #> Neocortex area Time period Factor F #> 1 OFC 3 Factor 1 -0.08530621 #> 2 DFC 3 Factor 1 -0.08530621 #> 3 VFC 3 Factor 1 -0.08530621 #> 4 MFC 3 Factor 1 -0.08490720 #> 5 M1C 3 Factor 1 -0.08530621 #> 6 S1C 3 Factor 1 -0.08510786 # Visualization of F(.) p <- ggplot( data = FX_long, aes(x = `Time period`, y = F, linetype = `Neocortex area`, colour = `Neocortex area`, group = `Neocortex area`) ) + geom_line(linewidth = 0.5) + ylab(NULL) + theme_bw() + scale_y_continuous(n.breaks = 4) + theme( text = element_text(size = 12), axis.text.y = element_text(size = 10), axis.title.x = element_text(size = 10, margin = margin(t = 3)), axis.text.x = element_text(size = 10), legend.title = element_text(size = 12), legend.text = element_text(size = 10), legend.key.size = unit(0.8, \"cm\"), legend.key.width = unit(0.8, \"cm\"), legend.position = \"right\", panel.spacing.y = unit(0.2, \"cm\"), # Space between panels aspect.ratio = 0.4 ) + facet_grid(Factor ~ ., scales = \"free_y\") p"},{"path":"https://yanglabhkust.github.io/mfair/articles/neocortex.html","id":"gene-set-enrichment-analysis","dir":"Articles","previous_headings":"","what":"Gene set enrichment analysis","title":"Spatial and temporal dynamics of gene regulation among brain tissues","text":"first calculated relative weight \\(k\\)-th factor \\(m\\)-th gene \\(\\left| W_{mk} \\right| / \\sum_{k^{\\prime}=1}^{3} \\left| W_{mk^{\\prime}} \\right|\\), \\(W_{m \\cdot} \\\\mathbb{R}^{3 \\times 1}\\) \\(m\\)-th row gene factors, selected top 300 weighted genes factor form gene sets. can conduct gene set enrichment analysis based Gene Ontology factor.","code":"# Inferred gene factors (corresponding to the W matrix in the MFAI paper) gene_factors <- mfairObject@W rownames(gene_factors) <- colnames(mfairObject@Y) # Assign gene symbols colnames(gene_factors) <- paste(\"Factor\", c(1:3)) head(gene_factors) #> Factor 1 Factor 2 Factor 3 #> DCUN1D2 0.07856363 0.06006969 0.16380535 #> ARRB1 -0.27132066 0.06770678 -0.12942443 #> PDE1B -0.01294115 0.39845756 0.20161848 #> PDE7B 0.01081660 0.41041139 0.52982200 #> TOX -0.11471109 -0.20478958 0.60887294 #> LOXHD1 0.24888992 0.01732711 -0.03634836 # Heatmap of the inferred gene factors pheatmap::pheatmap(t(gene_factors), scale = \"column\", clustering_method = \"complete\", cluster_row = FALSE, cluster_col = TRUE, treeheight_row = 0, treeheight_col = 0, border = FALSE, show_rownames = TRUE, show_colnames = FALSE, cellwidth = 0.2, cellheight = 40, fontsize = 12 ) # Normalize each factor to have l2-norm equal one gene_factors <- apply(gene_factors, MARGIN = 2, FUN = function(x) { x / sqrt(sum(x^2)) } ) # Relative weight gene_factors <- abs(gene_factors) gene_factors <- gene_factors / rowSums(gene_factors) M <- nrow(gene_factors)[1] # Total number of genes M = 2,000 ntop <- M * 0.15 # We use the top 300 weighted genes in each factor to form the gene sets # Index of top genes top_gene_idx <- apply(gene_factors, MARGIN = 2, FUN = function(x) { which(rank(-x) <= ntop) } ) top_genes <- apply(top_gene_idx, MARGIN = 2, FUN = function(x) { rownames(gene_factors)[x] } ) colnames(top_genes) <- paste(\"Factor\", c(1:3)) head(top_genes) #> Factor 1 Factor 2 Factor 3 #> [1,] \"ARRB1\" \"PDE1B\" \"AJAP1\" #> [2,] \"LOXHD1\" \"PDE7B\" \"KCNA3\" #> [3,] \"TYRP1\" \"KCNA2\" \"ASTN2\" #> [4,] \"PRKG1\" \"PMP22\" \"EMID1\" #> [5,] \"MS4A8B\" \"GPR155\" \"GPR52\" #> [6,] \"FAM131B\" \"SMAD2\" \"SEC24D\""},{"path":"https://yanglabhkust.github.io/mfair/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Zhiwei Wang. Author, maintainer.","code":""},{"path":"https://yanglabhkust.github.io/mfair/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Wang, Z., Zhang, F., Zheng, C., Hu, X., Cai, M., Yang, C. (2023). MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. arXiv preprint arXiv:2303.02566. Wang, Z. (2023). mfair: Matrix Factorization Auxiliary Information R. R package version 0.0.0.9000. https://yanglabhkust.github.io/mfair/.","code":"@Article{, title = {MFAI: A Scalable Bayesian Matrix Factorization Approach to Leveraging Auxiliary Information}, author = {Zhiwei Wang and Fa Zhang and Cong Zheng and Xianghong Hu and Mingxuan Cai and Can Yang}, journal = {arXiv preprint arXiv:2303.02566}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.02566}, } @Manual{, title = {mfair: Matrix Factorization with Auxiliary Information in R}, author = {Zhiwei Wang}, year = {2023}, note = {R package version 0.0.0.9000}, url = {https://yanglabhkust.github.io/mfair/}, }"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"mfair-matrix-factorization-with-auxiliary-information-in-r","dir":"","previous_headings":"","what":"Matrix Factorization with Auxiliary Information in R","title":"Matrix Factorization with Auxiliary Information in R","text":"R package mfair implements methods based paper MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. MFAI integrates gradient boosted trees probabilistic matrix factorization framework effectively leverage auxiliary information. parameters MAFI can automatically determined empirical Bayes framework, making adaptive utilization auxiliary information immune overfitting.","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Matrix Factorization with Auxiliary Information in R","text":"quick start, can install development version mfair GitHub : illustration examples, can alternatively use: build vignettes simultaneously. Please note can take minutes.","code":"# install.packages(\"devtools\") devtools::install_github(\"YangLabHKUST/mfair\") # install.packages(\"devtools\") devtools::install_github(\"YangLabHKUST/mfair\", build_vignettes = TRUE)"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"examples","dir":"","previous_headings":"","what":"Examples","title":"Matrix Factorization with Auxiliary Information in R","text":"basic example shows solve common problem: mfair can also handle matrix missing entries: Empirically, backfitting algorithm can improve performance: Explore vignette illustrating enrichment movie genre information: Explore vignette illustrating spatial temporal dynamics gene regulation among brain tissues: documentation examples, please visit package website.","code":"set.seed(20230306) library(mfair) #> Loading required package: rpart # Simulate data # Set the data dimension and rank N <- 100 M <- 100 K_true <- 2L # Set the proportion of variance explained (PVE) PVE_Z <- 0.8 PVE_Y <- 0.5 # Generate auxiliary information X X1 <- runif(N, min = -10, max = 10) X2 <- runif(N, min = -10, max = 10) X <- cbind(X1, X2) # F(X) FX1 <- X1 / 2 - X2 FX2 <- (X1^2 - X2^2 + 2 * X1 * X2) / 10 FX <- cbind(FX1, FX2) # Generate loadings Z (= F(X) + noise) sig1_sq <- var(FX1) * (1 / PVE_Z - 1) Z1 <- FX1 + rnorm(n = N, mean = 0, sd = sqrt(sig1_sq)) sig2_sq <- var(FX2) * (1 / PVE_Z - 1) Z2 <- FX2 + rnorm(n = N, mean = 0, sd = sqrt(sig2_sq)) Z <- cbind(Z1, Z2) # Generate factors W W <- matrix(rnorm(M * K_true), nrow = M, ncol = K_true) # Generate the main data matrix Y_obs (= Y + noise) Y <- Z %*% t(W) Y_var <- var(as.vector(Y)) epsilon_sq <- Y_var * (1 / PVE_Y - 1) Y_obs <- Y + matrix( rnorm(N * M, mean = 0, sd = sqrt(epsilon_sq) ), nrow = N, ncol = M ) # Create MFAIR object mfairObject <- createMFAIR(Y_obs, X, K_max = K_true) # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(verbose_loop = FALSE)) #> Set K_max = 2! #> Initialize the parameters of factor 1...... #> After 1 iterations Stage 1 ends! #> After 43 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 1 iterations Stage 1 ends! #> After 40 iterations Stage 2 ends! #> Factor 2 retained! # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) #> The main data matrix Y has no missing entries! # Root-mean-square-error sqrt(mean((Y_obs - Y_hat)^2)) #> [1] 12.22526 # Predicted/true matrix variance ratio var(as.vector(Y_hat)) / var(as.vector(Y_obs)) #> [1] 0.471485 # Prediction/noise variance ratio var(as.vector(Y_hat)) / var(as.vector(Y_obs - Y_hat)) #> [1] 0.9884637 # Split the data into the training set and test set n_all <- N * M training_ratio <- 0.5 train_set <- sample(1:n_all, n_all * training_ratio, replace = FALSE) Y_train <- Y_test <- Y_obs Y_train[-train_set] <- NA Y_test[train_set] <- NA # Create MFAIR object mfairObject <- createMFAIR(Y_train, X, K_max = K_true) # Fit the MFAI model mfairObject <- fitGreedy(mfairObject, sf_para = list(verbose_loop = FALSE)) #> Set K_max = 2! #> Initialize the parameters of factor 1...... #> After 1 iterations Stage 1 ends! #> After 68 iterations Stage 2 ends! #> Factor 1 retained! #> Initialize the parameters of factor 2...... #> After 1 iterations Stage 1 ends! #> After 66 iterations Stage 2 ends! #> Factor 2 retained! # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) # Root-mean-square-error sqrt(mean((Y_test - Y_hat)^2, na.rm = TRUE)) #> [1] 12.88825 # Predicted/true matrix variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs), na.rm = TRUE) #> [1] 0.4311948 # Prediction/noise variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs - Y_hat), na.rm = TRUE) #> [1] 0.8554015 # Refine the MFAI model with the backfitting algorithm mfairObject <- fitBack(mfairObject, verbose_bf_inner = FALSE, sf_para = list(verbose_sf = FALSE, verbose_loop = FALSE) ) #> Iteration: 1, relative difference of model parameters: 0.2212487. #> Iteration: 2, relative difference of model parameters: 0.05861598. #> Iteration: 3, relative difference of model parameters: 0.01781071. #> Iteration: 4, relative difference of model parameters: 0.02649139. #> Iteration: 5, relative difference of model parameters: 0.01830385. #> Iteration: 6, relative difference of model parameters: 0.007152868. # Prediction based on the low-rank approximation Y_hat <- predict(mfairObject) # Root-mean-square-error sqrt(mean((Y_test - Y_hat)^2, na.rm = TRUE)) #> [1] 12.84545 # Predicted/true matrix variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs), na.rm = TRUE) #> [1] 0.4434191 # Prediction/noise variance ratio var(as.vector(Y_hat), na.rm = TRUE) / var(as.vector(Y_obs - Y_hat), na.rm = TRUE) #> [1] 0.8846744 vignette(\"ml100k\") vignette(\"neocortex\")"},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"citing-our-work","dir":"","previous_headings":"","what":"Citing our work","title":"Matrix Factorization with Auxiliary Information in R","text":"find mfair package source code repository useful work, please cite: Wang, Z., Zhang, F., Zheng, C., Hu, X., Cai, M., Yang, C. (2023). MFAI: scalable Bayesian matrix factorization approach leveraging auxiliary information. arXiv preprint arXiv:2303.02566. URL: https://doi.org/10.48550/arXiv.2303.02566.","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"development","dir":"","previous_headings":"","what":"Development","title":"Matrix Factorization with Auxiliary Information in R","text":"package developed Zhiwei Wang (zhiwei.wang@connect.ust.hk).","code":""},{"path":"https://yanglabhkust.github.io/mfair/index.html","id":"contact-information","dir":"","previous_headings":"","what":"Contact Information","title":"Matrix Factorization with Auxiliary Information in R","text":"Please feel free contact Zhiwei Wang (zhiwei.wang@connect.ust.hk), Prof. Mingxuan Cai (mingxcai@cityu.edu.hk), Prof. Can Yang (macyang@ust.hk) inquiries.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":null,"dir":"Reference","previous_headings":"","what":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"MFAIR object number slots store information. Key slots access listed .","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"MFAIR class.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIR-class.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"Each MFAIR object has a number of slots which store information. Key slots to access are listed below. — MFAIR-class","text":"Y matrix Matrix::dgCMatrix. main data matrix N samples M features. X data.frame. auxiliary information data frame N samples C covariates. Y_sparse Logical. Whether main data matrix Y stored sparse mode. Y_center Logical. Whether main data matrix Y centered. Y_mean Numeric. Mean main data matrix Y centered. Zero . Y_missing Logical. Whether main data matrix Y partially observed. n_obs integer. Total number observed entries Y. N integer. Number rows (samples) Y, also number rows (samples) X. M integer. Number columns (features) Y. C integer. Number columns (auxiliary covariates) X. K_max integer. Note increasing K_max ensure actual K also increases, since K_max just upper bound model automatically infer K K_max. want inference larger rank K, please make sure K_max large enough tol_snr parameter fitting function fitGreedy() small enough, simultaneously. K integer. inferred rank Y. Z N * K matrix. Estimated loading matrix, corresponding inferred posterior mean Z MFAI model. a_sq matrix containing posterior variance Z k-th column corresponding k-th loading. fully observed Y, N elements one specific loading share posterior variance, a_sq 1 * K matrix. Y missing data, elements one specific loading different posterior variances, a_sq N * K matrix. W M * K matrix. Estimated factor matrix, corresponding inferred posterior mean W MFAI model. b_sq matrix containing posterior variance W k-th column corresponding k-th factor. fully observed Y, M elements one specific factor share posterior variance, b_sq 1 * K matrix. Y missing data, elements one specific factor different posterior variances, b_sq M * K matrix. tau Numeric. vector length K, containing precision parameter pair loading/factor. beta Numeric. vector length K, containing precision parameter loading Z_k. FX N * K matrix representing prior mean Z, corresponding F(X) MFAI model. tree_0 1 * K matrix containing tree_0 k-th column corresponding k-th factor. Tree_0 defined mean mu vector factor. tree_lists list length K, containing K fitted functions function represented list trees, .e., k-th list corresponds function F_k(.) MFAI model. initialization list. Initialization fitted model. learning_rate Numeric. learning rate gradient boosting part. tree_parameters list options control details rpart algorithm. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":null,"dir":"Reference","previous_headings":"","what":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"MFAIRSingleFactor object contains key information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"MFAIRSingleFactor class.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/MFAIRSingleFactor-class.html","id":"slots","dir":"Reference","previous_headings":"","what":"Slots","title":"MFAIRSingleFactor object contains the key information about the fitted single factor MFAI model. — MFAIRSingleFactor-class","text":"Y_missing Logical. Whether main data matrix Y partially observed. n_obs Integer. Total number observed entries Y. mu vector length N representing inferred loading, corresponding posterior mean z single factor MFAI model. a_sq Numeric. posterior variance loading z. fully observed Y, N elements loading share posterior variance, a_sq single number. Y missing data, elements different posterior variances, a_sq vector length N. nu vector length M representing inferred factor, corresponding posterior mean w single factor MFAI model. b_sq Numeric. posterior variance factor w. fully observed Y, M elements factor share posterior variance, b_sq single number. Y missing data, elements different posterior variances, b_sq vector length M. tau Numeric. Precision parameter pair loading/factor. beta Numeric. Precision parameter loading z. FX vector length N representing prior mean z, corresponding F(X) single factor MFAI model. tree_0 Numeric. Tree_0 defined mean mu vector. tree_list list containing multiple decision trees, corresponding function F(.) single factor MFAI model. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"Append fitted factor MFAIR object greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"","code":"appendMFAIR(object, object_sf)"},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"object MFAIR object containing information first factors. object_sf MFAIRSingleFactor needed appended.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/appendMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Append the fitted factor to the MFAIR object in the greedy algorithm. — appendMFAIR","text":"MFAIR object containing information new fitted single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Create MFAIR object main data matrix auxiliary information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"","code":"createMFAIR( Y, X, Y_sparse = FALSE, Y_center = TRUE, K_max = 1L, project = \"MFAIR\" )"},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Y matrix Matrix::dgCMatrix. main data matrix N samples M features. X data.frame. auxiliary information data frame N samples C covariates. Y_sparse Logical. Determines whether use spase mode Y. Y_center Logical. Determines whether centering performed. K_max integer. maximum rank allowed model. project Character. Name project (record keeping).","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/createMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create the MFAIR object with main data matrix and auxiliary information. — createMFAIR","text":"Returns MFAIR object, main data matrix auxiliary information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the MFAI model using backfitting algorithm. — fitBack","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"Fit MFAI model using backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"","code":"fitBack( object, learning_rate = 0.1, minsplit = 10, minbucket = round(minsplit/3), maxdepth = 2, other_tree_para = list(), iter_max_bf = 5000, tol_bf = 0.01, verbose_bf_inner = TRUE, verbose_bf_outer = TRUE, sf_para = list() )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"object MFAIR object. learning_rate Numeric. Parameter gradient boosting part. minsplit Numeric. Parameter gradient boosting part. minbucket Numeric. Parameter gradient boosting part. maxdepth Numeric. Parameter gradient boosting part. other_tree_para list containing parameters gradient boosting part. See rpart::rpart.control() details. iter_max_bf Integer. Maximum iterations allowed. tol_bf Numeric. convergence criterion. verbose_bf_inner Logical. Whether display detailed information inner loop. verbose_bf_outer Logical. Whether display detailed information outer loop. sf_para list containing parameters fitting single factor MFAI model. See fitSFFully() fitSFMissing() details.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitBack.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the MFAI model using backfitting algorithm. — fitBack","text":"MFAIR object containing information fitted MFAI model using backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the MFAI model using greedy algorithm. — fitGreedy","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"Fit MFAI model using greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"","code":"fitGreedy( object, K_max = NULL, learning_rate = 0.1, minsplit = 10, minbucket = round(minsplit/3), maxdepth = 2, other_tree_para = list(), tol_snr = 0.002, verbose_greedy = TRUE, save_init = FALSE, sf_para = list() )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"object MFAIR object. K_max Integer. maximum rank allowed MFAI model. learning_rate Numeric. Parameter gradient boosting part. minsplit Integer. Parameter gradient boosting part. minbucket Integer. Parameter gradient boosting part. maxdepth Integer. Parameter gradient boosting part. other_tree_para list containing parameters gradient boosting part. See rpart::rpart.control() details. tol_snr Numeric. convergence criterion determine inferred rank data. verbose_greedy Logical. Whether display detailed information fitting model. save_init Logical. Whether save initialization model. sf_para list containing parameters fitting single factor MFAI model. See fitSFFully() fitSFMissing() details.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitGreedy.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the MFAI model using greedy algorithm. — fitGreedy","text":"MFAIR object containing information fitted MFAI model using greedy algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"Fit single factor MFAI model fully observed main data matrix.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"","code":"fitSFFully( Y, X, init, learning_rate, tree_parameters, stage1 = TRUE, iter_max = 5000, tol_stage1 = 0.1, tol_stage2 = 1e-05, verbose_sf = TRUE, verbose_loop = TRUE, save_tree_list = TRUE )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"Y Main data matrix. X data.frame containing auxiliary information. init MFAIRSingleFactor object containing initial parameters single factor MAFI model. learning_rate Numeric. Parameter gradient boosting part. tree_parameters list containing parameters gradient boosting part. stage1 Logical. Whether perform fitting algorithm stage1. greedy algorithm needs backfitting algorithm need. iter_max Integer. Maximum iterations allowed. tol_stage1 Numeric. Convergence criterion first step. tol_stage2 Numeric. Convergence criterion first step. verbose_sf Logical. Whether display detailed information. verbose_loop Logical. Whether display detailed information looping. save_tree_list Logical. Whether save tree list.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFFully.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the single factor MFAI model with fully observed main data matrix. — fitSFFully","text":"MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"Fit single factor MFAI model partially observed main data matrix.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"","code":"fitSFMissing( Y, obs_indices, X, init, learning_rate, tree_parameters, stage1 = TRUE, iter_max = 5000, tol_stage1 = 0.1, tol_stage2 = 1e-05, verbose_sf = TRUE, verbose_loop = TRUE, save_tree_list = TRUE )"},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"Y Main data matrix. obs_indices Indices observed entries main data matrix. X data.frame containing auxiliary information. init MFAIRSingleFactor object containing initial parameters single factor MAFI model. learning_rate Numeric. Parameter gradient boosting part. tree_parameters list containing parameters gradient boosting part. stage1 Logical. Whether perform fitting algorithm stage1. greedy algorithm needs backfitting algorithm need. iter_max Integer. Maximum iterations allowed. tol_stage1 Numeric. Convergence criterion first step. tol_stage2 Numeric. Convergence criterion first step. verbose_sf Logical. Whether display detailed information. verbose_loop Logical. Whether display detailed information looping. save_tree_list Logical. Whether save tree list.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/fitSFMissing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit the single factor MFAI model with partially observed main data matrix. — fitSFMissing","text":"MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Compute evidence lower bound (ELBO) fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"","code":"getELBO(Y, object)"},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Y Observed main data matrix. object MFAIRSingleFactor object containing information fitted single factor MFAI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getELBO.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute the evidence lower bound (ELBO) for fitted single factor MFAI model. — getELBO","text":"Numeric. ELBO.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":null,"dir":"Reference","previous_headings":"","what":"Get importance measures of auxiliary covariates. — getImportance","title":"Get importance measures of auxiliary covariates. — getImportance","text":"Get importance measures auxiliary covariates.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get importance measures of auxiliary covariates. — getImportance","text":"","code":"getImportance(object, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get importance measures of auxiliary covariates. — getImportance","text":"object MFAIR object. which_factors factors, .e., fitted functions evaluated. K factors evaluated default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get importance measures of auxiliary covariates. — getImportance","text":"Importance score matrix. row auxiliary covariate column factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportance.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get importance measures of auxiliary covariates. — getImportance","text":"rpart::rpart() function automatically change special characters variable names dot may cause inconsistency errors. Please ensure auxiliary covariates' names contain special characters want use function.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"Get importance measures auxiliary covariates single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"","code":"getImportanceSF(tree_list, variables_names)"},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"tree_list fitted function represented list trees. variables_names names auxiliary covariates.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/getImportanceSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get importance measures of auxiliary covariates in a single factor. — getImportanceSF","text":"Importance score vector. entry importance score one auxiliary covariate.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Initialize the parameters for the single factor MAFI model. — initSF","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"Initialize parameters single factor MAFI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"","code":"initSF(Y, Y_missing, Y_sparse, n_obs)"},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"Y Main data matrix. Y_missing Logical. Whether main data matrix partially observed. Y_sparse Logical. Whether main data matrix sparse mode. n_obs Integer. Total number observed entries.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/initSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Initialize the parameters for the single factor MAFI model. — initSF","text":"MFAIRSingleFactor object containing initial parameters single factor MAFI model.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/matrixORdgCMatrix-class.html","id":null,"dir":"Reference","previous_headings":"","what":"Define the matrixORdgCMatrix class as the union of matrix and Matrix::dgCMatrix — matrixORdgCMatrix-class","title":"Define the matrixORdgCMatrix class as the union of matrix and Matrix::dgCMatrix — matrixORdgCMatrix-class","text":"Define matrixORdgCMatrix class union matrix Matrix::dgCMatrix","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":null,"dir":"Reference","previous_headings":"","what":"MovieLens 100K data. — ml100k","title":"MovieLens 100K data. — ml100k","text":"list containing movie rating, user information, movie genres MovieLens 100K data.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"MovieLens 100K data. — ml100k","text":"","code":"ml100k"},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"MovieLens 100K data. — ml100k","text":"list containing movie rating matrix, user information data frame, movie genres data frame. rating Movie rating matrix 943 users 1,682 movies (0–5 star rating). user data frame containing information 943 users, three columns correspond age, gender, occupation respectively. genre binary data frame containing genre information 1,682 movies, column corresponds one specific genre.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/ml100k.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"MovieLens 100K data. — ml100k","text":"https://grouplens.org/datasets/movielens/100k/","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":null,"dir":"Reference","previous_headings":"","what":"Human brain gene expression data. — neocortex","title":"Human brain gene expression data. — neocortex","text":"list containing bulk gene expression human brain tissue sample information.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Human brain gene expression data. — neocortex","text":"","code":"neocortex"},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Human brain gene expression data. — neocortex","text":"list containing gene expression data matrix tissue sample information data frame. expression Bulk gene expression matrix 886 tissue samples neocortex region 2,000 genes highest differential stability. sample_info data frame containing information 886 bulk tissue samples neocortex region. four columns correspond sample ID, neocortex area, hemisphere, time periods respectively.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/neocortex.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Human brain gene expression data. — neocortex","text":"https://hbatlas.org/pages/data","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for MFAIR object. — predict,MFAIR-method","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"Prediction function MFAIR object.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"","code":"# S4 method for MFAIR predict(object, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"object model object prediction desired. which_factors factors, .e., columns Z W, used make prediction. K factors used default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIR-method.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for MFAIR object. — predict,MFAIR-method","text":"Predicted matrix dimension Y.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"Prediction function MFAIRSingleFactor object.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"","code":"# S4 method for MFAIRSingleFactor predict(object)"},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"object model object prediction desired.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predict-MFAIRSingleFactor-method.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for MFAIRSingleFactor object. — predict,MFAIRSingleFactor-method","text":"Predicted matrix dimension Y.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for fitted functions. — predictFX","title":"Prediction function for fitted functions. — predictFX","text":"Prediction function fitted functions.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for fitted functions. — predictFX","text":"","code":"predictFX(object, newdata, which_factors = seq_len(object@K))"},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for fitted functions. — predictFX","text":"object MFAIR object. newdata Data frame containing values predictions required. which_factors factors, .e., fitted functions used. K factors used default.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFX.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for fitted functions. — predictFX","text":"matrix containing predicted F(X). row new sample column factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction function for fitted function F() in single factor. — predictFXSF","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"Prediction function fitted function F() single factor.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"","code":"predictFXSF(tree_list, newdata, learning_rate)"},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"tree_list fitted function represented list trees. newdata Data frame containing values predictions required. learning_rate Numeric. learning rate gradient boosting part.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/predictFXSF.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction function for fitted function F() in single factor. — predictFXSF","text":"vector containing predicted F(X). entry corresponds new sample.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":null,"dir":"Reference","previous_headings":"","what":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"Update k-th factor MFAIR object backfitting algorithm.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"","code":"updateMFAIR(object, object_sf, k)"},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"object MFAIR object containing initial information K-factor MFAI model. object_sf MFAIRSingleFactor containing information newly fitted single factor MFAI model. k Integer. fator updated.","code":""},{"path":"https://yanglabhkust.github.io/mfair/reference/updateMFAIR.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update the k-th factor of the MFAIR object in the backfitting algorithm. — updateMFAIR","text":"MFAIR object containing information new fitted single factor.","code":""}]