## PeakANOVA: Stronger findings from mass spectral data through multi-peak modeling
## An example script for inferring the clusters and covariate effects

## Copyright 2013 Tommi Suvitaival
# Email: tommi.suvitaival@aalto.fi

# This file is part of PeakANOVA.

# PeakANOVA is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# PeakANOVA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public License
# along with PeakANOVA.  If not, see <http://www.gnu.org/licenses/>.


## Description
# An example script for running the PeakANOVA analysis.


## 1) Load functions

path.source = getwd() # Set this to match the path of the source code files of the package.

source(file.path(path.source, "peakANOVA-loadSource.R"))
loadSourcePeakAnova(path=path.source)

## 2) Generate the default model parameters.

param = getDefaultParamPeakAnova()

## 3) Generate simulated data.

data.generated = generateSimulatedDataPeakAnova(effects.covariate.a=c(0.5,-1,2,0,0,0,0), N.variables.per.cluster=7, N.samples.per.category=7, sigma=1, p.spike.gen.inside=0.01, p.spike.gen.outside=0.99, shapes.beta.gen.inside=c(2,1), shapes.beta.gen.outside=c(1,1))

data = list()
data$covariates = data.generated$covariates
data$Q.dbeta.log = data.generated$Q.dbeta.log # likelihood of the peak shape correlation values

## 4) Normalize the intensity data.

tmp = normalizeDataByControlPopulation(X=data.generated$X, covariates=data.generated$covariates)
data$X = tmp$X
normalization = tmp$normalization
rm(tmp)

## 5) Infer the clusters.

param$Nburnin = 100 # number of burn-in iterations
param$Npsamples = 100 # number of iterations after burn-in
param$Npsaved = 100 # number of Gibbs samples saved after burn-in

result.clustering = clusterPeakAnova(Q.dbeta.log=data$Q.dbeta.log, param=param)

## 6) Infer the covariate effects on clusters.

param$Nburnin = 1000 # number of burn-in iterations
param$Npsamples = 1000 # number of iterations after burn-in
param$Npsaved = 100 # number of Gibbs samples saved after burn-in

param$fixed$V = result.clustering$V.ls # Fix the clustering.

# The peak shape matrices should not be given to the effect inference function. Otherwise the model will continue inferring clusters.
data$Q = NULL
data$Q.dbeta.log = NULL

result.effects = multiWayDR(data=data, param=param)

## 7) Alternative approach

# Compute the covariate effect from the data based on the variable with highest variance (the strongest peak in the log-space)
effects.data.strongest.peak = vector(mode="numeric", length=ncol(data.generated$V.true))
for (ki in 1:ncol(data.generated$V.true)) { # Go through all ground-truth clusters.
	variables.ki = which(data.generated$V.true[,ki]==1) # Find peaks of the ground-truth cluster.
	tmp = which.max(apply(X=data$X[variables.ki,which(data$covariates$a==1)], MAR=1, FUN=sd)) # Find the "strongest peak" of the ground-truth cluster.
	effects.data.strongest.peak[ki] = mean(data$X[variables.ki[tmp],which(data$covariates$a==2)]) # Compute the effectbased on the "strongest peak" of the ground-truth cluster.
}
rm(tmp)

## 8) Print the results

results.pm = rbind(colMeans(result.effects$posterior$eff$A[,,-1]), effects.data.strongest.peak)
results.pm = cbind(results.pm, sqrt(rowMeans((results.pm-array(data=rep(x=data.generated$effects.a.true,each=nrow(results.pm)),dim=dim(results.pm)))^2))) # Compute the root-mean-square error (RMSE) of the three approaches.
results.pm = rbind(c(colSums(param$fixed$V),NA), c(data.generated$effects.a.true,NA), results.pm)
rownames(results.pm) = c("Number of variables in the cluster", "True effect", "*Posterior mean of the covariate effect*", "Effect of the strongest peak in the data")
colnames(results.pm) = c(paste("Cl.",1:length(effects.data),sep=""), "*RMSE*")
print(signif(x=results.pm, digits=2))
