
"""
Learn a Discriminative Component Analysis (DCA) projection for a
given set of labeled data.
Usage:

    python learn_dca.py input_datafile n_input_dim n_classes output_filename n_output_dim random_seed n_of_iterations n_of_secants pretransformation init_projection sigma use_angle_reparameterization

Here the parameters are:
    input_datafile    A text file containing the data points as a matrix
                      of space separated values. Each row is one data
                      point. The first columns are the input dimensions
                      (n_input_dim columns); the last columns are binary
                      class indicators (n_classes columns) where each column
                      indicates whether the data point belongs to that class.
    n_input_dim       How many dimensions the data has, not counting the
                      class index.
    n_classes         How many different classes there are in the data.
    output_filename   The projection matrix will be written to this text file.
                      The output will be a matrix of size (n_input_dim rows,
                      n_output_dim columns), with space separated values.
    n_output_dim      How many dimensions the projected data should have.
    random_seed       Value of a random seed. Currently not used.
    n_of_iterations   Number of conjugate gradient iterations to use in the
                      optimization.
    n_of_secants      Number of secant steps to use in each conjugate gradient
                      iteration.
    pretransformation Initialization file containing an initial transformation
                      matrix applied to the data: this must be a full square
                      (n_input_dim by n_input_dim) matrix.
    init_projection   The initial projection matrix which is used as the 
                      starting point of the optimization. The matrix should 
                      have n_input_dim rows and n_output_dim columns, and 
                      should have space separated values.
    sigma             Sigma (Gaussian standard deviation) used in the
                      nonparametric class density estimate. This should be
                      a positive value: roughly speaking, larger values yield
                      'softer' estimates that change more slowly between
                      points. See the Peltonen and Kaski 2005 paper.
    use_angle_reparameterization   Whether to learn an orthogonal projection 
                      (give value 1 for this parameter) or an unrestricted 
                      projection (give value 0).
"""

import sys, time, os
from Numeric import * 
from string import atof
import dca

note=sys.stderr.write

# Read arguments
def argmap(types,args):
    return map(lambda f,i: {'i': int, 'f': float, 's': str}[f](i), types, args)
iFilename, n_input_dim, nclasses, oFilename, n_output_dim, rseed, nIt, nSecants, init_transformation_name, init_projection_name, sigma, use_angle_reparameterization =\
           argmap('siisiiiissfi', sys.argv[1:13])


# Read data
note('Python: Reading data\n')
data=[]
dataclasses=[]
iStream=open(iFilename, 'r')
fileread=0
while not fileread:
    line = iStream.readline()
    if line=='':
        fileread=1
    else:
        line=string.splitfields(line)
        #print line
        x=zeros([n_input_dim,]).astype(Float)
        for k in xrange(n_input_dim):
            x[k] = string.atof(line[k])
        c=zeros([nclasses,]).astype(Float)
        for k in xrange(nclasses):
            c[k] = string.atof(line[k+n_input_dim])
        data.append(x)
        dataclasses.append(c)
data=array(data).astype(Float)
dataclasses=array(dataclasses).astype(Float)


# Read pretransformation
note('Python: Reading model\n')
iStream=open(init_transformation_name, 'r')
A=[]
for k in xrange(n_input_dim):
    line = iStream.readline()
    line = string.splitfields(line)
    x = zeros([len(line),]).astype(Float)
    for l in xrange(len(line)):
        x[l] = string.atof(line[l])
    A.append(x)
A=array(A).astype(Float)


# Read initial projection
iStream=open(init_projection_name, 'r')
initial_W=[]
for k in xrange(n_input_dim):
    line = iStream.readline()
    line = string.splitfields(line)
    x = zeros([len(line),]).astype(Float)
    for l in xrange(len(line)):
        x[l] = string.atof(line[l])
    initial_W.append(x)
initial_W=array(initial_W).astype(Float)


# Initialize model
note('Python: Initializing model\n')
sim = dca.dca(data, dataclasses, nclasses, A, initial_W, sigma, use_angle_reparameterization)


# Train model
note('Python: Training model\n')
costs=sim.optimize(nIt,nSecants)


# Save model
sim.write_model(oFilename)

