package jmprojection;

import java.util.Random;

/**
 * Curvilinear Component Analysis algorithm.
 * Algorithm by Pierre Demartines and Jeanny Hrault<br>
 * in a article named "CCA: "Curvilinear Component Analysis"<br>
 * Published in Proc. of 15th workshop GRETSI sep. 1995<br>
 * The algorithm first quantizes the data-vectors. Then it calculates distance
 * between points in N-space so that all routes from a non-centroid vector to a
 * non-centroid vector must go through some centroid. That distance is called
 * curvilinear distance. Then it projects centroids to two dimensions and after
 * that quantized vectors to same two dimensions trying to preserve euclidean
 * distance in two dimensions near curvilinear distance in N-space.
 *
 * @author Jarkko Miettinen<br>
 * TODO How to choose parameters in a automatic way, especially MAX_LOSS?
 */

public class CCA extends AbstractMapping implements
Curvilinear
{

	private int pDimSize, trainLen, centroidCount, nDimSize, vectorCount;
	private double maxLoss, startLambda, startAlpha, endLambda = 0.02, maxDistance;
	private double[][] nSpaceVectors, nSpaceCentroids, pSpaceVectors,
	pSpaceCentroids;

	private static final double MIN_ALPHA = 0.02;
	public static final double DEFAULT_LAMBDA = 1.0;
	public static final double DEFAULT_ALPHA = 0.5;
	public static final double DEFAULT_MAX_LOSS = 0.10;
	//default number of iterations in every iterative process:
	//neural water, centroid and data-vector projection.
	public static final int DEFAULT_TRAINING_LENGTH = 100;
	public static final int DEFAULT_NUMBER_OF_PROJECTION_DIMENSIONS = 2;
	public static final String NAME_OF_MAPPING =
		"Curvilinear Component Analysis";

	/**
	 * @param data N x D matrix, where N is number of points and D the
	 * dimensionality of those points
	 * @param startingAlpha starting value of learning factor [0,1]
	 * @param startingLambda starting value of neighbourhoodfactor [0,1]
	 * @param maximumLoss maximum allowed loss of accuracy in vector quantization
	 * @param trainingLength number of iteration-steps this algorithm is going
	 * to take
	 * @param projDim number of dimensions where to project the data
	 */
	public CCA(double[][] data, double startingAlpha, double startingLambda,
			double maximumLoss, int trainingLength, int projDim)
	{
		super(data);
		if (trainingLength > 0)
		{
			this.trainLen = trainingLength;
		} else
		{
			this.trainLen = DEFAULT_TRAINING_LENGTH;
		}
		if (startingAlpha > 0.0 && startingAlpha <= 1.0)
		{
			this.startAlpha = startingAlpha;
		} else
		{
			this.startAlpha = DEFAULT_ALPHA;
		}
		if (startingLambda > 0.0 && startingLambda <= 1.0)
		{
			this.startLambda = startingLambda;
		} else
		{
			this.startLambda = DEFAULT_LAMBDA;
		}
		if (maximumLoss >= 0.0 && maximumLoss < 1.0)
		{
			this.maxLoss = maximumLoss;
		} else
		{
			this.maxLoss = DEFAULT_MAX_LOSS;
		}
		if (projDim > 0)
		{
			this.pDimSize = projDim;
		} else
		{
			this.pDimSize = DEFAULT_NUMBER_OF_PROJECTION_DIMENSIONS;
		}
		nDimSize = data[0].length;
		vectorCount = data.length;
		System.out.println("inputdata is " + vectorCount + " x " + nDimSize);
		createVectors(data);
	}

	/**
	 * @param data N x D matrix, where N is number of points and D the
	 * dimensionality of those points
	 * Creates new CCA mapping with default options.
	 */
	public CCA(double[][] data)
	{
		this(data, DEFAULT_ALPHA, DEFAULT_LAMBDA, DEFAULT_MAX_LOSS,
				DEFAULT_TRAINING_LENGTH, DEFAULT_NUMBER_OF_PROJECTION_DIMENSIONS);
	}

	/**
	 * Return name of this mapping
	 */
	public String getName()
	{
		return NAME_OF_MAPPING;
	}

	/**
	 * Returns coordinates in projection space.
	 * @return <code>double[D][N]</code> D x N matrix, where N is the number of
	 * data-vectors originally given to algorithm as the input and D is the
	 * dimensionality of projection space.
	 */
	public double[][] getState()
	{
		double[][] state = new double[pDimSize][vectorCount];
		for (int i = 0; i < vectorCount; i++)
		{
			for (int j = 0; j < pDimSize; j++)
			{
				state[j][i] = pSpaceVectors[i][j];
			}
		}
		return state;
	}

	/**
	 * Calculates and returns mapping error after current number of iterations.
	 * <pre>
	 * Error = 0.5 * Sum((D(i,j) - d(i,j))^2 * F(d(i,j)), i,j = 1..N, i != j)
	 * where
	 * D(i,j) = Euclidean distance between points i and j in N-space
	 * d(i,j) = Euclidean distance between points i and j in projection space
	 * F(x)   = Neighbourhood function, in this case unitstep function
	 * </pre>
	 * @return mapping error, not the most informative error in the world
	 */
	public double mappingError()
	{
		double lambda = calcLambda(iterations);
		double error = 0.0;
		for (int i = 0; i < vectorCount; i++){
			for (int j = 0; j < centroidCount; j++){
				if (i != j){
					double dOrig = dist(nSpaceVectors[i], nSpaceCentroids[j]);
					double dMap = dist(pSpaceVectors[i], pSpaceCentroids[j]);
					double temp =  dOrig - dMap;
					//two rows below are removed because we use unitstep
					//function as a scaling function
//					temp *= temp*scalingFunction(dMap, lambda);
//					error += temp;
					if (dMap <= lambda)	error += temp*temp;
				}
			}
		}
		return 0.5*error;
	}

	/**
	 * Iterates one step. If iteration count < 1, initializes the algorithm.
	 */
	public void iterate()
	{
		if (iterations  < 1)
		{
			neuralWater(maxLoss, (int) (30 + Math.ceil(Math.log(vectorCount))));
			int numberOfCentroidIterations = 10 * centroidCount;
			for (int i = 0; i < numberOfCentroidIterations; i++)
			{
				projectCentroids(i);
			}
		}
		if (iterations < trainLen)
		{
			projectVectors(iterations++);
		}
	}

	/**
	 * Calculates Sammon's Stress
	 * @return Sammon's stress
	 * @see Curvilinear#sammonsStress()
	 */
	public double sammonsStress()
	{
		double error = 0;
		double temp;
		double dOrig;
		double c = 0.0;

		for (int i = 0; i < vectorCount;i++){
			for (int j = i + 1 ; j < vectorCount;j++){
				dOrig = dist(nSpaceVectors[i], nSpaceVectors[j]);
				c += dOrig;
				temp = dist(pSpaceVectors[i], pSpaceVectors[j]);
				temp *= temp;
				temp /= dOrig;
				error += temp;
			}
		}
		return error/c;
	}

	/**
	 * Return parameters of this mapping
	 * @see AbstractMapping#getParameters()
	 * @return Parameters on which this mapping is run.
	 */
	public String getParameters()
	{
		String st = "Alpha: " + startAlpha + " Lambda: " + startLambda
		+ " Training Length: " + trainLen+ " maximum loss: "
		+ maxLoss;
		return st;
	}

	/**
	 * calculates distances between every centroid pair (c[i],c[j]) in original
	 * space and projection space and returns K x 2 array, where array[m][1] is
	 * the distance in original space ("dx") and array[m][0] the distance in
	 * projection space ("dy") for every m=0..K-1. K is n*(n+1)/2 where n is
	 * number of centroids.
	 * @return pair-wise distance-matrix
	 */
	public double[][] getDyDxError()
	{
		int n = centroidCount;
		int arrayLength = n*(n+1)/2;
		double[][] dxdyArray = new double[arrayLength][2];
		n = 0;
		for (int i = 0; i < centroidCount; i++){
			for (int j = i + 1; j < centroidCount;j++){
				double dx = dist(nSpaceCentroids[i], nSpaceCentroids[j]);
				double dy = dist(pSpaceCentroids[i], pSpaceCentroids[j]);
				dxdyArray[n][1] = dx;
				dxdyArray[n][0] = dy;
				n++;
			}
		}
		return dxdyArray;
	}

	/**
	 * Creates vectors from given data-set. Data is presumed to be of form
	 * <pre>
	 *   case1 case2 case3...
	 * x1  a     b     c ...
	 * x2  d     e     f ...
	 * x3  g     h     i ...
	 * .   .     .     . .
	 * .   .     .     .  .
	 * .   .     .     .   .
	 * @param data
	 */
	private void createVectors(double[][] data)
	{
		nSpaceVectors = new double[vectorCount][nDimSize];
		pSpaceVectors = new double[vectorCount][];
		for (int i = 0; i < vectorCount; i++)
		{
			System.arraycopy(data[i], 0, nSpaceVectors[i], 0, nDimSize);
		}
	}

	/**
	 * Performs a vector quantization by creating a centroid every time no
	 * centroid is found sufficiently close and by moving sufficiently close
	 * centroid towards some vector.
	 * In pseudocode:<pre>
	 * r = tolerableLoss * max(Euclidean distance between every vector pair(i,j))
	 * FOR t = 1 .. maxIterations
	 * 	alpha = alpha(t)
	 * 	FOR each vector x[i]
	 * 		Find nearest centroid y[j]
	 * 		IF y[j] != <code>nul</code> and d(x[i],y[j]) <= r
	 * 			delta = alpha * (x[i] - y[j]);
	 * 			y[j] = y[j] + delta;
	 * 		ELSE
	 * 			create new centroid y[k] at the same location as x[i]
	 * 		END IF
	 * 	END FOR
	 * END FOR
	 * </pre>
	 * @param tolerableLoss maximum acceptable loss in vector quantization
	 * @param maxIterations maximum number of iterations the algorithm will
	 * try to find a better quantization for vectors
	 */
	private void neuralWater(double tolerableLoss, int maxIterations)
	{
		nSpaceCentroids = new double[vectorCount][];
		centroidCount = 0;
		maxDistance = Double.MIN_VALUE;
		for (int i = 0; i < vectorCount; i++)
		{
			for (int j = i + 1; j < vectorCount; j++)
			{
				double temp = dist(nSpaceVectors[i], nSpaceVectors[j]);
				if (temp > this.maxDistance)
				{
					this.maxDistance = temp;
				}
			}
		}
		double radiusOfInfluence = this.maxDistance * tolerableLoss;
		for(int t = 0 ; t < maxIterations ; t++){
			double alpha = calcAlpha(t, maxIterations);

			/*
			 * We try to find, for each vector, the nearest centroid whose
			 * radiusOfInfluence includes this vector and if we find it, we move
			 * the centroid towards the vector. If no sufficiently close
			 * centroid is found, we create a new centroid at the vectors
			 * position.
			 */

			for (int i = 0; i < vectorCount; i++)
			{
				double[] vector = nSpaceVectors[i];
				double[] nearestCentroid = null;
				double closestDistance = Double.MAX_VALUE;
				for (int j = 0; j < centroidCount; j++)
				{
					double[] tempCentroid = nSpaceCentroids[j];
					double tempDistance = dist(vector, tempCentroid);
					if (nearestCentroid == null || tempDistance <
							closestDistance)
					{
						nearestCentroid = tempCentroid;
						closestDistance = tempDistance;
					}
				}

				//If no centroid fulfills the requirements, create a new
				//centroid positioned in N-space at the same location as the
				//vector.

				if (nearestCentroid == null || closestDistance >
				radiusOfInfluence)
				{
					double[] newCentroid = new double[nDimSize];

					System.arraycopy(vector, 0, newCentroid, 0, nDimSize);
					nSpaceCentroids[centroidCount++] = newCentroid;
				}
				else
				{
					moveTowards(nearestCentroid, vector, alpha);
				}
			}
		}
		//Finished iterating centroid locations
		double[][] newCentroidArray = new double[centroidCount][nDimSize];
		for (int i = 0; i < centroidCount; i++)
		{
			System.arraycopy(nSpaceCentroids[i], 0, newCentroidArray[i], 0, nDimSize);
		}
		nSpaceCentroids = newCentroidArray;
	}

	/**
	 * This method projects <code>Centroids</code> to projection space and tries
	 * to get them to such a configuration that distances in projection space
	 * are near to curvilinear distances in N-space. This is done by first
	 * projecting each <code>Centroid</code> to a random point in projection
	 * space by formula:<br>
	 * <br><code>
	 * x = this.maxDistance * this.random.nextDouble();<br>
	 * y = this.maxDistance * this.random.nextDouble();
	 * </code><br>
	 * Then through every iteration round:
	 * <pre>
	 * FOR each Centroid x[j]
	 * 	FOR each Centroid x[i] != x[j]
	 * 		delta(x[i]) = alpha(t) * (D(i,j)/d(i,j) - 1) *
	 * 		F(lambda(t) - d(i,j) * (x[i] - x[j]);
	 * 		x[i] = x[i] + delta(x[i]);
	 * 	END FOR
	 * END FOR
	 * </pre><br>
	 * Where D(i,j) is Euclidean distance between <code>Centroids</code>
	 * i and j in N-space and d euclidean distance between same points in
	 * projection space.
	 * F is scaling function used to prefer shorter distances over longer. In
	 * this algorithm we use Heaviside unit-step function as F. x[i] - x[j] is
	 * the vector-difference in projection space between two centroids.<br>
	 *
	 * @param t <code>int</code> stating which iteration round is going on. If
	 * t < 1 then we project Centroids to some random points in projection
	 * space. This parameter is used to calculate alpha and lambda.
	 * @see CCA#alpha(int, int)
	 * @see CCA#lambda(int)
	 */
	private void projectCentroids(int epoch)
	{
		if (epoch == 0) //Initialize the projection with random values
		{
			pSpaceCentroids = new double[centroidCount][pDimSize];
			for (int i = 0; i < centroidCount; i++)
			{
				double[] coords = pSpaceCentroids[i];
				for (int j = 0; j < pDimSize; j++)
				{
					coords[j] = maxDistance * R.nextDouble();
				}
			}
		}
		double[] movingCP, movingCN, staticCP, staticCN;
		double alpha = calcAlpha(epoch, trainLen);
		double lambda = calcLambda(epoch);
		int[] permArray = genRandPermutation(centroidCount);
		for (int i = 0; i < centroidCount; i++)
		{
			staticCP = pSpaceCentroids[permArray[i]];
			staticCN = nSpaceCentroids[permArray[i]];
			for (int j = 0; j < centroidCount; j++)
			{
				if (i != j)
				{
					movingCP = pSpaceCentroids[j];
					movingCN = nSpaceCentroids[j];
					double dMap = dist(staticCP, movingCP);
					if (dMap == 0.0 || dMap == -0.0) dMap = 1.0;
					if (dMap <= lambda)
					{
						double dOrig = dist(movingCN, staticCN);
						double coeff = alpha*((dOrig/dMap - 1.0));
						applyDelta(movingCP, staticCP, coeff);
					}
				}
			}
		}
	}

	/**
	 * Projection algorithm for the <code>QuantizableVectors</code> themselves.
	 * On first iteration round, we first project <code>QuantizableVectors</code>
	 * to random positions in projection space determined by this formula:
	 * <br><code>
	 * x = this.maxDistance * this.random.nextDouble();<br>
	 * y = this.maxDistance * this.random.nextDouble();
	 * </code><br>
	 * Then we go through every <code>Centroid</code> and apply stochastic
	 * gradient descent.
	 * This is done so that we, for each <code>Centroid c</code>,
	 * go through every <code>QuantizableVector v</code>
	 * and calculate a <code>double</code> coefficient by formula:<br>
	 * <code> coeff = alpha(t) * (D(c,v)/d(c,v) - 1)*unitstep(lambda(t) -
	 * d(c,v))</code>
	 * <pre>
	 * where
	 * D(c,v)    = Euclidean distance in N-space between points c and v
	 * d(c,v)    = Euclidean distance in projection space between c and v.
	 * alpha(t)  = learning coefficient, monotonically decreasing in time
	 * lambda(t) = neighbourhood coefficient, monotonically decreasing in time
	 * </pre>
	 * In this algorithm we use unitstep-function as
	 * function favoring smaller distances. Other easy choice would be an
	 * decreasing exponential function.
	 *
	 * Same in pseudocode:
	 * <pre>
	 * alpha = alpha(t)
	 * lambda = lambda(t)
	 * FOR each Centroid x[j]
	 * 	FOR each Vector x[i]
	 * 		delta(x[i]) = alpha(t) * (D(i,j)/d(i,j) - 1) *
	 * 		F(lambda(t) - d(i,j) * (x[i] - x[j]);
	 * 		x[i] = x[i] + delta(x[i]);
	 * 	END FOR
	 * END FOR
	 * </pre><br>
	 * @param t number of the ongoing iteration round, used to calculate alpha
	 * and lambda and to determine whether or not we should randomly project
	 * <code>QuantizableVectors</code> to projection space (which is the case
	 * if t < 1 aka on the first iteration round).<br>
	 *
	 */
	private void projectVectors(int epoch)
	{
		if (vectorCount == centroidCount)
		{
			pSpaceVectors = pSpaceCentroids;
			return;
		}
		if (epoch == 0) //Initialize the projection with random values
		{
			pSpaceVectors = new double[vectorCount][pDimSize];
			for (int i = 0; i < vectorCount; i++)
			{
				double[] coords = pSpaceVectors[i];
				for (int j = 0; j < pDimSize; j++)
				{
					coords[j] = maxDistance * R.nextDouble();
				}
			}
		}
		double[] vectorP, vectorN, centroidP, centroidN;
		double alpha = calcAlpha(epoch, trainLen);
		double lambda = calcLambda(epoch);
		int permArray[] = genRandPermutation(centroidCount);
		for (int i = 0; i < centroidCount; i++)
		{
			centroidP = pSpaceCentroids[permArray[i]];
			centroidN = nSpaceCentroids[permArray[i]];
			for (int j = 0; j < vectorCount; j++)
			{
				if (i != j)
				{
					vectorP = pSpaceVectors[j];
					vectorN = nSpaceVectors[j];
					double dMap = dist(centroidP, vectorP);
					if (dMap == 0.0 || dMap == -0.0) dMap = 1.0;
					if (dMap <= lambda)
					{
						double dOrig = dist(centroidN, vectorN);
						double coeff = alpha*((dOrig/dMap - 1.0));
						applyDelta(vectorP, centroidP, coeff);
					}
				}
			}
		}
	}

	/**
	 * Moves given centroid towards the given vector. Used in neuralWater-method
	 * to iteratively move centroids in such way that they represent vectors
	 * in the best way
	 * @param movingCentroid centroid that is moved towards the vector
	 * @param staticVector vector which we are moven towards
	 * @param alpha current learning factor, decides how far we move towards
	 * the vectors position.
	 */
	private void moveTowards(double[] movingCentroid, double[] staticVector, double alpha)
	{
		for (int i = 0; i < movingCentroid.length; i++)
		{
			double x = movingCentroid[i];
			double y = staticVector[i];
			movingCentroid[i] += alpha * (y - x);
		}
	}

	/**
	 * Moves given point (vector or centroid) towards another point
	 * @param moving the vector or centroid that is moved
	 * @param towards towards which centroid it is moved
	 * @param alpha current learning factor, decides how far we move the <code>
	 * moving</code>vector towards given centroid.
	 */
	private void applyDelta(double[] moving, double[] towards, double alpha)
	{
		for (int i = 0; i < moving.length; i++)
		{
			double x = moving[i];
			double y = towards[i];
			moving[i] += alpha * (x - y);
		}
	}

	/**
	 * Calculates alpha (the learning coefficient) with given epoch and total
	 * training length. Function of alpha is f(epoch) = alpha_max *
	 * (alpha_min / alpha_max)^(epoch / (training length - 1)
	 * @param epoch current epoch
	 * @param tlen total length of training
	 * @return
	 */
	private double calcAlpha(int epoch, int tlen)
	{
		double alpha;
		alpha = this.startAlpha
		* Math.pow((MIN_ALPHA / this.startAlpha), epoch
				/ (tlen - 1.0));
		return alpha;
	}

	/**
	 * Calculates current radius of neighbourhood. The neighbourhood function is
	 * f(epoch) = max(d[i,j]) * lambda_max *
	 * (lambda_min / lambda_max)^(epoch/(training length - 1))
	 * where max(d[i,j]) is the maximum distance between two point in n-space.
	 * @param epoch current epoch
	 * @return
	 */
	private double calcLambda(int epoch)
	{
		double lambda;
		lambda = this.startLambda * Math.pow((this.endLambda / this.startLambda), epoch/
				(this.trainLen - 1.0));
		//To scale neighbourhood
		lambda *= 3*this.maxDistance;
		return lambda;
	}
}
