We provide one example for using FLASH, and implementing the greedy and backfitting algorithms. For the speed, we will just just run the backfitting algorithm with a few runs of FLASH during each iteration. We demonstrate that FLASH performs competitively against a positive matrix decomposition (PMD).
library("flashr") sim_K = function(K, N, P, SF, SL, signal,noise){ E = matrix(rnorm(N*P,0,noise),nrow=N) Y = E L_true = array(0, dim = c(N,K)) F_true = array(0, dim = c(P,K)) for(k in 1:K){ lstart = rnorm(N, 0, signal*(k/K)) fstart = rnorm(P, 0, signal*(k/K)) index = sample(seq(1:N),(N*SL)) lstart[index] = 0 index = sample(seq(1:P),(P*SF)) fstart[index] = 0 L_true[,k] = lstart F_true[,k] = fstart Y = Y + lstart %*% t(fstart) } return(list(Y = Y, L_true = L_true, F_true = F_true, Error = E)) } sim_hd = function(N, P, SF, SL, signal, a = rchisq(N,3),b = rchisq(P,1),mu = 0){ E = matrix(rep(0,N*P),nrow=N) sig2_true = matrix(rep(0,N*P),nrow=N) for(i in 1:N){ for(j in 1:P){ sig2_true[i,j] = mu + a[i] * b[j] E[i,j] = rnorm(1,0,sqrt(mu + a[i] * b[j])) } } K=1 lstart = rnorm(N, 0, signal) fstart = rnorm(P, 0, signal) index = sample(seq(1:N),(N*SL)) lstart[index] = 0 index = sample(seq(1:P),(P*SF)) fstart[index] = 0 Y = lstart %*% t(fstart) + E return(list(Y = Y, L_true = lstart, F_true = fstart, Error = E,sig2_true = sig2_true)) }
We have several choices for the variance structure and the factors as well.
Here we provide some example:
set.seed(99) N = 100 P = 200 data = sim_K(K=1,N, P, SF = 0.8, SL = 0.5, signal = 1,noise = 0.5) Y = data$Y E = data$Error gf_1 = flash(Y,objtype = "l") gf_1$sigmae2 # fixed factor gf_2 = flash(Y,factor_value = data$F_true, fix_factor = TRUE) gf_2$sigmae2 sum(gf_2$f != data$F_true) # known variance gf_3 = flash(Y,objtype = "l", sigmae2_true = 0.25,partype = "known") gf_3$sigmae2
Missing value case
set.seed(99) N = 100 P = 200 data = sim_K(K=1,N, P, SF = 0.8, SL = 0.5, signal = 1,noise = 0.5) Y = data$Y Y[1:10,1:10] = NA E = data$Error gf = flash(Y,objtype = "l") gf$sigmae2
set.seed(99) N = 100 P = 200 data = sim_hd(N, P, SF = 0.5, SL=0.5, signal =1, a = rep(1,N), b = rchisq(P,1), mu = 0) Y = data$Y E = data$Error gf_1 = flash(Y,objtype = "l", partype = "var_col") plot(data$sig2_true[1,], gf_1$sigmae2) # fixed factor gf_2 = flash(Y, partype = "var_col", factor_value = data$F_true, fix_factor = TRUE) sum(gf_2$f != data$F_true) plot(data$sig2_true[1,], gf_2$sigmae2) # known variance gf_3 = flash(Y,objtype = "l", partype = "known",sigmae2_true = data$sig2_true[1,]) plot(data$sig2_true[1,], gf_3$sigmae2)
set.seed(99) N = 100 P = 200 data = sim_hd(N, P, SF = 0.5, SL=0.5, signal =1, a = rchisq(N,3),b = rchisq(P,1),mu = 0) Y = data$Y E = data$Error gf = flash(Y,objtype = "l", partype = "B") plot(data$sig2_true,gf$sigmae2)
set.seed(99) N = 100 P = 200 data = sim_hd(N, P, SF = 0.5, SL=0.5, signal =1, a = rchisq(N,3),b = rchisq(P,1),mu = 0) Y = data$Y E = data$Error gf = flash(Y,objtype = "l", partype = "known",sigmae2_true = data$sig2_true) plot(data$sig2_true,gf$sigmae2)
set.seed(99) N = 100 P = 200 data = sim_hd(N, P, SF = 0.5, SL=0.5, signal =1, a = rchisq(N,3),b = rchisq(P,1),mu = 0.9) Y = data$Y E = data$Error gf = flash(Y,objtype = "l", partype = "noisy",sigmae2_true = (data$sig2_true - 0.9)) (gf$sigmae2 - (data$sig2_true - 0.9))[1,1] # missing value Y = data$Y Y[1:10,1:20] = NA gf = flash(Y,objtype = "l", partype = "noisy",sigmae2_true = (data$sig2_true - 0.9)) (gf$sigmae2 - (data$sig2_true - 0.9))[1,1]
set.seed(99) N = 100 P = 600 data = sim_K(K=10,N, P, SF = 0.9, SL = 0.8, signal = 1,noise = 1) Y = data$Y E = data$Error plot(svd(Y)$d) ggd = greedy(Y,K = 100) #eigen_plot(ggd,sum(Y^2)) gbf <- backfitting(Y, ggd, maxiter_bf = 50)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.