require(MASS) require(lars) ######################################################################### # Reference: Leng and Wang (2009). On General Adaptive Sparse Principal # Component Analysis. JCGS. 18, 201-215. # # Written by Chenlei Leng (stalc@nus.edu.sg), this version May 6, 2009. # Questions and comments are welcome. # # Input: x - either the data matrix (when type='predictor') or # the gram matrix (when type='Gram') # n - sample size, needed especially when x is the gram matrix # for computing BIC # d - the number of PCs to retain # method - GAS, SPCA or VAST, see the paper for details # adaptive - whether to use adaptive penalty # n.BT - number of bootstrap samples # corr - whether to use correlation matrix # seed - random seed used for booststrap # para - a vector of integers specifying how many zero coefs # should be retained for each loading, inherited from # function spca in R library elasticnet # # Output: loadings - the estimated loadings # B0 - the estimated loadings of PCA ######################################################################### gas.pca <- function(x, n, d=1, type=c("Gram","predictor"), max.iter=10, method=c('GAS','SPCA','VAST'), adaptive=TRUE, para=NULL, corr=FALSE, seed=NULL, n.BT = 2000) { eps.conv <- 1e-3 call <- match.call() type <- match.arg(type) method <- match.arg(method) vn <- dimnames(x)[[2]] p <- dim(x)[2] xtx <- switch(type, predictor = { x <- scale(x, center = TRUE, scale = FALSE) xtx <- t(x)%*%x }, Gram = { xtx <- x }) svdobj <- svd(xtx) v <- svdobj$v A <- as.matrix(v[, 1:d, drop = FALSE]) for(i in 1:d) A[,i] <- A[,i]*sign(A[1,i]) B <- A; temp <- B; A0 <- B if(method!='SPCA') { if(!is.null(seed)) { set.seed(seed) } if(is.null(n.BT)) n.BT <- 2000 boot.ev <- array(0, dim=c(p,n.BT,d)) for(ii in 1:n.BT) { newx <- switch(type, Gram={ tmp <- mvrnorm(n, mu=rep(0,p), xtx) newx <- t(tmp)%*%tmp }, predictor={ idx <- sample(1:n,n,replace=T) newx <- t(x[idx,])%*%x[idx,] }) tmp.svd <- svd(newx/n) tmp <- tmp.svd$v[,1:d,drop=FALSE] t2 <- acos(apply(tmp*A0,2,sum)); t3 <- t2>pi/2 tmp[,t3] <- -tmp[,t3] boot.ev[,ii,] <- tmp } boot.cov <- array(0, dim=c(p,p,d)) for(ii in 1:d) boot.cov[,,ii] <- cov(t(boot.ev[,,ii])) boot.incov <- array(0, dim=c(p,p,d)) for(ii in 1:d) boot.incov[,,ii] <- solve(boot.cov[,,ii]) } xtx.1 <- xtx if(corr) xtx.1 <- xtx.1*n II <- diag(p)*n k <- 0 diff <- 1 tr.x <- tr(xtx) while ((k < max.iter) & (diff > eps.conv)) { k <- k + 1 for (i in 1:d) { xtx1 <- switch(method, GAS = {xtx1 <- boot.incov[,,i,drop=TRUE]}, SPCA = { xtx1<- xtx.1}, VAST = { xtx1 <- diag(diag(boot.incov[,,i,drop=TRUE]))}) lfit <- lars.lsa(xtx1,A[,i],n=n,adaptive=adaptive,para=para[i]) B[,i] <- lfit$beta.bic } normB <- sqrt(apply(B^2, 2, sum)) normB[normB == 0] <- 1 B2 <- t(t(B)/normB) diff <- max(abs(B2 - temp)) temp <- B A <- xtx %*% B; z <- svd(A) A <- (z$u) %*% t(z$v) } B <- normalize(B, d) dimnames(B) <- list(vn, paste("PC", 1:d, sep = "")) dimnames(A0) <- list(vn, paste("PC", 1:d, sep = "")) obj <- list(loadings=B, B0=A0) obj } tr <- function(x) sum(diag(x)) rootmatrix <- function(x) { x.eigen <- eigen(x) d <- x.eigen$values d <- (d + abs(d))/2 v <- x.eigen$vectors return(v %*% diag(sqrt(d)) %*% t(v)) } normalize <- function(B,d) { normB <- sqrt(apply(B^2, 2, sum)) normB[normB == 0] <- 1 B <- t(t(B)/normB) B[abs(B)<1e-6] <- 0 for (i in 1:d) { tmp <- abs(B[,i])>1e-6; if(sum(tmp)!=0) { tmp1 <- B[tmp,i] B[,i] <- sign(tmp1[1])*B[,i] } } B } lars.lsa <- function (Sigma0, b0, n, adaptive=TRUE, type = c("lar","lasso"), eps = .Machine$double.eps, max.steps, para=NULL) { type <- match.arg(type) TYPE <- switch(type, lasso = "LASSO", lar = "LAR") n1 <- dim(Sigma0)[1] Sigma <- Sigma0 b <- b0 if(adaptive) { Sigma <- diag(abs(b))%*%Sigma%*%diag(abs(b)) b <- sign(b) } nm <- dim(Sigma) m <- nm[2] im <- inactive <- seq(m) Cvec <- drop(t(b)%*%Sigma) ssy <- sum(Cvec*b) if (missing(max.steps)) max.steps <- 8 * m beta <- matrix(0, max.steps + 1, m) Gamrat <- NULL arc.length <- NULL R2 <- 1 RSS <- ssy first.in <- integer(m) active <- NULL actions <- as.list(seq(max.steps)) drops <- FALSE Sign <- NULL R <- NULL k <- 0 ignores <- NULL while ((k < max.steps) & (length(active) < m)) { action <- NULL k <- k + 1 C <- Cvec[inactive] Cmax <- max(abs(C)) if (!any(drops)) { new <- abs(C) >= Cmax - eps C <- C[!new] new <- inactive[new] for (inew in new) { R <- updateR(Sigma[inew, inew], R, drop(Sigma[inew, active]), Gram = TRUE,eps=eps) if(attr(R, "rank") == length(active)) { ##singularity; back out nR <- seq(length(active)) R <- R[nR, nR, drop = FALSE] attr(R, "rank") <- length(active) ignores <- c(ignores, inew) action <- c(action, - inew) } else { if(first.in[inew] == 0) first.in[inew] <- k active <- c(active, inew) Sign <- c(Sign, sign(Cvec[inew])) action <- c(action, inew) } } } else action <- -dropid Gi1 <- backsolve(R, backsolvet(R, Sign)) dropouts <- NULL A <- 1/sqrt(sum(Gi1 * Sign)) w <- A * Gi1 if (length(active) >= m) { gamhat <- Cmax/A } else { a <- drop(w %*% Sigma[active, -c(active,ignores), drop = FALSE]) gam <- c((Cmax - C)/(A - a), (Cmax + C)/(A + a)) gamhat <- min(gam[gam > eps], Cmax/A) } if (type == "lasso") { dropid <- NULL b1 <- beta[k, active] z1 <- -b1/w zmin <- min(z1[z1 > eps], gamhat) if (zmin < gamhat) { gamhat <- zmin drops <- z1 == zmin } else drops <- FALSE } beta[k + 1, ] <- beta[k, ] beta[k + 1, active] <- beta[k + 1, active] + gamhat * w Cvec <- Cvec - gamhat * Sigma[, active, drop = FALSE] %*% w Gamrat <- c(Gamrat, gamhat/(Cmax/A)) arc.length <- c(arc.length, gamhat) if (type == "lasso" && any(drops)) { dropid <- seq(drops)[drops] for (id in rev(dropid)) { R <- downdateR(R,id) } dropid <- active[drops] beta[k + 1, dropid] <- 0 active <- active[!drops] Sign <- Sign[!drops] } actions[[k]] <- action inactive <- im[-c(active)] } beta <- beta[seq(k + 1), ] dff <- b-t(beta) RSS <- diag(t(dff)%*%Sigma%*%dff) if(adaptive) beta <- t(abs(b0)*t(beta)) dof <- apply(abs(beta)>eps,1,sum) BIC <- RSS+log(n)*dof AIC <- RSS+2*dof if(is.null(para)) { tmp <- sort(BIC,ind=TRUE) beta.bic <- beta[tmp$ix[1],] } else { tmp1 <- dof==para beta.bic <- beta[tmp1,] } list(beta.bic=beta.bic, BIC=BIC, beta=beta) }