generateSimulatedDataPeakAnova = function(effects.covariate.a=c(0.5,-1,2,0,0,0,0), gamma.df=NULL, gamma.threshold=NULL, N.samples.per.category=7, N.variables.per.cluster=7, peaks.relative.to.strongest.peak=TRUE, p.spike.gen.inside=0.01, p.spike.gen.outside=0.99, p.spike.like.inside=0.01, p.spike.like.outside=0.99, psi=0.1, shapes.beta.gen.inside=c(2,1), shapes.beta.gen.outside=c(1,1), shapes.beta.like.inside=c(2,1), shapes.beta.like.outside=c(1,1), shapes.beta.relative.to.strongest.peak=NULL, sigma=1) {


	## PeakANOVA: Stronger findings from mass spectral data through multi-peak modeling
	## Function for generating simulated data

	## Copyright 2013 Tommi Suvitaival
	# Email: tommi.suvitaival@aalto.fi

	# This file is part of PeakANOVA.

	# PeakANOVA is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.

	# PeakANOVA is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	# GNU Lesser General Public License for more details.

	# You should have received a copy of the GNU Lesser General Public License
	# along with PeakANOVA.  If not, see <http://www.gnu.org/licenses/>.


	## Description
	# -A function for generating simulated data for the PeakANOVA model.

	## Arguments
	# -effects.covariate.a: Effects of covariate 'a'. A vector of length K with real values, where K is the number of clusters (compounds). The length will determine the number of clusters in the generated data.
	# -N.variables.per.cluster: Number of variables (peaks) in each cluster. A positive integer value.
	# -N.samples.per.category: Number of samples in each ANOVA category (i.e., the number of samples that share the same level of covariate 'a'). An positive integer value.
	# -sigma: Noise variance level. A positive real value.
	# -p.spike.gen.inside: Likelihood of a missing value in a peak shape correlation matrix, for a pair of peaks in the same cluster. A real value between 0 and 1.
	# -p.spike.gen.outside: Likelihood of a missing value in a peak shape correlation matrix, for a pair of peaks in different clusters. A real value between 0 and 1.
	# -shapes.beta.gen.inside: Parameters of the beta distribution that defines the likelihood of an observed value in a peak shape correlation matrix, for a pair of peaks in the same cluster. A vector of length 2 with non-negative real values.
	# -shapes.beta.gen.outside: Parameters of the beta distribution that defines the likelihood of an observed value in a peak shape correlation matrix, for a pair of peaks in different clusters. A vector of length 2 with non-negative real values.

	## Value
	# -covariates: Covariate levels of samples. A list of vectors 'a', 'b' and 'c' corresponding to covariates with the same names. Each vector is of length N with positive integer values, where N is the total number of samples.
	# -Q: Peak shape correlations data. An array with of real values between -1 and 1 or missing values (NA). The array has dimensions NxPxP, where N is the total number of samples and P is the total number of variables (peaks).
	# -Q.dbeta.log: Logarithmic likelihood of observed peak shape correlations data. A list of two matrices 'inside' and 'outside' containing the log-likelihoods of peak shape correlations in the same and in different clusters, respectively, summed over all samples. Both matrices are real-valued and are of dimensionality PxP, where P is the total number of variables (peaks).
	# -V.true: Ground-truth clustering of variables (peaks). A matrix with values 0 and 1 with one non-zero value on each row indicating the cluster assignment. The dimensionality of the matrix is PxK, where P is the total number of variables and K is the number of clusters.
	# -X: The intensity (peak height) data. A matrix with dimensions PxN, where P is the total number of variables and N is the total number of samples.

	
	## Source code

	N.clusters = length(effects.covariate.a)
	if ((!is.null(gamma.df) & !is.null(gamma.threshold) & !is.null(shapes.beta.relative.to.strongest.peak)) | (is.null(gamma.df) & is.null(gamma.threshold) & is.null(shapes.beta.relative.to.strongest.peak))) {
		peaks.relative.to.strongest.peak = (!is.null(gamma.df) & !is.null(gamma.threshold) & !is.null(shapes.beta.relative.to.strongest.peak)) # If these arguments are given, the heights of the peaks in a cluster are generated relative to the highest peak.
	} else {
		stop("Missing arguments: gamma.df, gamma.threshold or shapes.beta.relative.to.strongest.peak")
	}
	if ((!is.null(shapes.beta.like.inside) & !is.null(shapes.beta.like.outside) & !is.null(p.spike.like.inside) & !is.null(p.spike.like.outside)) | (is.null(shapes.beta.like.inside) & is.null(shapes.beta.like.outside) & is.null(p.spike.like.inside) & is.null(p.spike.like.outside))) {
		q.like.sum = (!is.null(shapes.beta.like.inside) & !is.null(shapes.beta.like.outside)) # If parameters of the model are given, the likelihood values will be computed and returned instead of the actual observed shape correlation values.
	} else {
		stop("Missing arguments: shapes.beta.like.inside or shapes.beta.like.outside")
	}
	if (is.null(p.spike.gen.inside) | is.null(p.spike.gen.outside) | is.null(shapes.beta.gen.inside) | is.null(shapes.beta.gen.outside)) {
		stop("Missing arguments: p.spike.gen.inside, p.spike.gen.outside, shapes.beta.gen.inside or shapes.beta.gen.outside")
	}

	## Create the covariate vectors.
	
	data = list()
	data$covariates = list()
	data$covariates$a = rep(x=1:2, each=N.samples.per.category)
	data$covariates$b = rep(x=1, length=length(data$covariates$a))
	data$covariates$c = data$covariates$b

	## Generate latent variable given the covariate effects.
	
	xlat = array(dim=c(0,2*N.samples.per.category)) # xlat will be a K x N matrix.
	for (ki in 1:N.clusters) { # Go through all clusters.
		if (!is.null(psi)) {
			xlat = rbind(xlat, c(rnorm(n=N.samples.per.category,sd=sqrt(psi)), rnorm(n=N.samples.per.category,mean=effects.covariate.a[ki],sd=sqrt(psi))))
		} else {
			xlat = rbind(xlat, c(rnorm(n=N.samples.per.category), rnorm(n=N.samples.per.category,mean=effects.covariate.a[ki])))
		}
	}
	
	## Generate scales of the variables.
	
# 	gamma.df = 1
# 	gamma.threshold = 1000
# 	shapes.beta.relative.to.strongest.peak = c(1,5)
	
	V = NULL
	for (li in 1:N.clusters) { # Go through all clusters.
		tmp = array(data=0, dim=c(N.variables.per.cluster,N.clusters))
		if (peaks.relative.to.strongest.peak) {
			gamma.li = 1/rchisq(n=1, df=gamma.df)
			ind.gamma.over.threshold = which(gamma.li>gamma.threshold)
			while (length(ind.gamma.over.threshold)>0) {
				gamma.li[ind.gamma.over.threshold] = 1/rchisq(n=length(ind.gamma.over.threshold), df=gamma.df)
				ind.gamma.over.threshold = which(gamma.li>gamma.threshold)
			}
			if (N.variables.per.cluster>1) {
				gamma.li = permute(x=c(1, rbeta(n=N.variables.per.cluster-1, shape1=shapes.beta.relative.to.strongest.peak[1], shape2=shapes.beta.relative.to.strongest.peak[2]))*gamma.li)
			}
			tmp[,li] = gamma.li
		} else {
			tmp[,li] = 1
		}
		V = rbind(V, tmp)
	}
# 	data$V.true = V
	data$V.true = (V>0)*1
	data$effects.a.true = effects.covariate.a
	
	## Generate the observed data.
	
	data$X = V%*%xlat+array(data=rnorm(n=N.variables.per.cluster*N.clusters*ncol(xlat), sd=sigma), dim=c(N.variables.per.cluster*N.clusters,ncol(xlat)))
	
	## Generate the observed sample shape correlation matrices.
	
	if (q.like.sum) {
		data$Q.dbeta.log = list()
		data$Q.dbeta.log$inside = array(dim=c(1,1)*(N.variables.per.cluster*N.clusters)) # A matrix with dimensions variables x variables.
		data$Q.dbeta.log$outside = array(dim=c(1,1)*(N.variables.per.cluster*N.clusters))
	} else {
		data$Q = array(dim=c(ncol(data$X),c(1,1)*(N.variables.per.cluster*N.clusters))) # An array with dimensions samples x variables x variables.
	}
	
	q.tmp = array(dim=c(ncol(data$X),N.variables.per.cluster*N.clusters)) # A matrix with dimensions samples x variables.
	for (li in 1:N.clusters) { # Go through all clusters.
		ind.i = which(V[,li]>0) # Select variables of cluster 'li'.
		for (pi in 1:length(ind.i)) { # Go through the variables of cluster 'li'.
			q.tmp[] = NA
			# Sample correlations between different clusters.
			ind.o = which(V[,li]==0 & (1:nrow(V))>pi)
			if (length(ind.o)>0) {
				if (p.spike.gen.outside>0) {
					if (p.spike.gen.outside<1) {
						ind.slab = which(runif(n=length(ind.o)*nrow(q.tmp))>p.spike.gen.outside)
						if (length(ind.slab)>0) {
							q.tmp[,ind.o][ind.slab] = rbeta(n=length(ind.slab), shape1=shapes.beta.gen.outside[1], shape2=shapes.beta.gen.outside[2])
							if (length(ind.slab)<(length(ind.o)*nrow(q.tmp))) {
								q.tmp[,ind.o][-ind.slab] = NA
							}
						} else {
							q.tmp[,ind.o] = NA
						}
					} else {
						q.tmp[,ind.o] = NA
					}
				} else {
					q.tmp[,ind.o] = rbeta(n=length(ind.o)*ncol(x), shape1=shapes.beta.gen.outside[1], shape2=shapes.beta.gen.outside[2])
				}
			}
			# Sample correlations within the cluster.
			if (pi<length(ind.i)) {
				ind.i.pi = ind.i[(pi+1):length(ind.i)]
				if (p.spike.gen.inside>0) {
					ind.slab = which(runif(n=length(ind.i.pi)*nrow(q.tmp))>p.spike.gen.inside)
					if (length(ind.slab)>0) {
						q.tmp[,ind.i.pi][ind.slab] = rbeta(n=length(ind.slab), shape1=shapes.beta.gen.inside[1], shape2=shapes.beta.gen.inside[2])
						if (length(ind.slab)<(length(ind.i.pi)*nrow(q.tmp))) {
							q.tmp[,ind.i.pi][-ind.slab] = NA
						}
					} else {
						q.tmp[,ind.i.pi] = NA
					}
				} else {
					q.tmp[,ind.i.pi] = rbeta(n=length(ind.i.pi)*ncol(x), shape1=shapes.beta.gen.inside[1], shape2=shapes.beta.gen.inside[2])
				}
				ind.filled.pi = c(ind.i.pi, ind.o)
			} else {
				ind.filled.pi = ind.o
			}
			# Compute the likelihood of the observed shape correlation matrices.
			if (q.like.sum) {
				R.filled.pi = !is.na(q.tmp[,ind.filled.pi,drop=FALSE])
				q.dbeta.log.filled.pi = array(dim=dim(R.filled.pi))
				q.dbeta.log.filled.pi[R.filled.pi] = dbeta(x=q.tmp[,ind.filled.pi,drop=FALSE][R.filled.pi], shape1=shapes.beta.like.inside[1], shape2=shapes.beta.like.inside[2], log=TRUE) + log(1-p.spike.like.inside)
				q.dbeta.log.filled.pi[!R.filled.pi] = log(p.spike.like.inside)
				data$Q.dbeta.log$inside[ind.i[pi],ind.filled.pi] = colSums(q.dbeta.log.filled.pi)
				
				q.dbeta.log.filled.pi[] = NA
				q.dbeta.log.filled.pi[R.filled.pi] = dbeta(x=q.tmp[,ind.filled.pi,drop=FALSE][R.filled.pi], shape1=shapes.beta.like.outside[1], shape2=shapes.beta.like.outside[2], log=TRUE) + log(1-p.spike.like.outside)
				q.dbeta.log.filled.pi[!R.filled.pi] = log(p.spike.like.outside)
				data$Q.dbeta.log$outside[ind.i[pi],ind.filled.pi] = colSums(q.dbeta.log.filled.pi)
				
				# Replicate to produce a symmetric matrix
				data$Q.dbeta.log$inside[ind.filled.pi,ind.i[pi]] = data$Q.dbeta.log$inside[ind.i[pi],ind.filled.pi]
				data$Q.dbeta.log$outside[ind.filled.pi,ind.i[pi]] = data$Q.dbeta.log$outside[ind.i[pi],ind.filled.pi]
			} else {
				data$Q[,ind.filled.pi,ind.i[pi]] = q.tmp[,ind.filled.pi]
				data$Q[,ind.i[pi],ind.filled.pi] = q.tmp[,ind.filled.pi]
			}
		}
	}

	return(data)

}