library(ape) # All routines written by Pantelis Z. Hadjipantelis and David Springate (30-Jul-2012); while still experimental this code is due to be appear under some GPL-related license #Ancestral Node Reconstruction ANR <-function( phy , y , theta ){ #phy : the phylogenetic tree in ape format. #y : the value at the tips of the phylogeny #theta : the theta vector of hyperparameters (sigma_f, lambda, sigma_n) #returns three element list with the Mean and Variance of predictions plus the LogLikelikelihood associated with this predictions. #initial check if all tips are known if ( dim(cophenetic(phy))[1] != length(y) ) { print ("Not all points at the tips are known / Insufficient number of known points. Aborting"); return();} #initial check if all thetas are valid if (length(which(theta < (0)))) { print("Not all thetas are positives / Invalid thetas. Aborting"); return();} if (length(theta) != 3) { print("Theta vector is not fully populated. Aborting"); return();} #calculate full tree distance matrix x = dist.nodes(phy); #calculate predictions (posterior estimates) for each internal node in the tree p_Preds <- Predictions_ForAwholeTree(Y=y, X= x, Theta= theta) Predictions = list(); #Parse the predicted posteriors Predictions$Means= p_Preds$Means Predictions$Vars = p_Preds$Vars #Get marginal logLikelihood for the prediction model Predictions$logLik = ModelLogLik( Y = y, X = x, Theta = theta) return(Predictions) } Predictions_ForAwholeTree <- function(Y,X,Theta ){ #Theta: OU hyperparameters #X : Pairwise distances of all points in the phylogeny #Y : Values at the tips # returns two-element list of size equal to the number of internal nodes in the tree. 1st element : Means, 2nd element Variance N <- length(Y); #Number of tips N_w <- dim(X)[1] #Number of nodes in the whole tree M <- N_w - N #Number of internal nodes in the tree Means=c(); Vars=c(); #Predict for each individual internal point of the phylogeny for (i in 1:M){ P1 = Predictions_ForAspecificNode ( Y= Y, X= X[1:N,1:N], Theta= Theta, new_X= X[1:N,i+N]) Means[i] = P1$Means; Vars[i] = P1$Vars; } return(list(Means=Means,Vars=Vars)) } Predictions_ForAspecificNode <- function(Y,X,Theta,new_X){ #Theta: OU hyperparameters #X : Pairwise distances of known points in the phylogeny #Y : Values at the tips #new_X : Pairwise distances of estimation point in the phylogeny with the respect to the existing ones. # returns two-element list. 1st element : Means, 2nd element Variances N <- length(Y);#Number of tips K <- matrix( rep(0, N^2) , ncol= N)#covariance matrix s_f = ( Theta[1] ) #function's amplitude l = ( Theta[2] ) #characteristic lengh scale s_n = ( Theta[3] ) #noise's amplitude for ( i in 1:N){ for ( j in i:N){ K[i,j] = s_f^2 * exp(-(abs( X[i,j] ))/(l)) } } K = K + t(K) K = K + diag(N) * (s_n^2 - s_f^2) #Calculate the COVAR from each data point to the new one: K_x_xs = rep( 0,N) K_xs_xs = s_f^2 + s_n^2 # + s_c #Set Means and Vars Means = 0; Vars = 0 xs_coord = new_X for (i in 1:N){ K_x_xs[i] = s_f^2 * exp( -(abs( xs_coord[i]))/(l) ) } #Get the posterior estimate Means = K_x_xs %*% solve(K ,Y) Vars = K_xs_xs - K_x_xs %*% solve( K , K_x_xs) return(list(Means=Means,Vars=Vars)) } ModelLogLik <- function(Y,X,Theta ){ #Theta: OU hyperparameters #X : Pairwise distances of all points in the phylogeny #Y : Values at the tips # returns models marginal log-likelihood N <- length(Y); #Number of tips N_w <- dim(X)[1] #Number of nodes in the whole tree M <- N_w - N #Number of internal nodes in the tree N <- length(Y);#Number of tips K <- matrix( rep(0, N^2) , ncol= N)#covariance matrix s_f = ( Theta[1] ) #function's amplitude l = ( Theta[2] ) #characteristic lengh scale s_n = ( Theta[3] ) #noise's amplitude for ( i in 1:N){ for ( j in i:N){ K[i,j] = s_f^2 * exp(-(abs( X[i,j] ))/(l)) } } K = K + t(K) K = K + diag(N) * (s_n^2 - s_f^2 + 0.00001) Chol_K <- try(chol((K)), silent=TRUE) if( is.double(Chol_K)) { #if the Cholesky decomposition exists. alpha = solve(K,Y); A1 <- .5 %*% t(Y) %*% alpha; #"Fit term" A2 <- sum(log(diag(Chol_K))); #"Complexity term" L = A1 + A2 + (N*.5)*log(2*pi) return(-L)} else { print("Matrix is not PSD!"); return(0); } }