#include "Classifier.hpp"
#include <algorithm>
#include <stdio.h>

using std::vector;
using std::string;
using std::cerr;
using std::cout;
using std::endl;
using cv::Mat;
using cv::Range;
using cv::Point;
using cv::Vec3b;
using cv::Rect;
using cv::Scalar;

template<typename T>
static T* Malloc(size_t n) {
  return (T*)malloc(n*sizeof(T));
}

#if !__APPLE__
#include <cblas.h>
#include <cblas_f77.h>
#include <atlas/clapack.h>
#endif // !__APPLE__

extern "C" {
  // this is not very stylish? where are these functions declared??? 
  // shouldn't proper headers be included???
  // -Matti
  int ilaenv_(int&, char*, char*, int&, int&, int&, int&);
  void ssyevx_(char*, char*, char*, int&, float*, int&, float&,
	       float&, int&, int&, float&, int&, float*, float*,
	       int&, float*, int&, int*, int*, int&);
  void sgetrf_(int&, int&, float*, int&, int*, int&);
  void sgetri_(int&, float*, int&, int*, float*, int&, int&);
   void sgesvd_(char* /* JOBU */, char* /* JOBVT */, int& /* M*/, 
 	       int & /* N*/,
 	       float* /* A*/, int&, /* LDA*/
 	       float* /* S*/,
 	       float* /* U*/, int&, /* LDU*/
 	       float* /* VT*/, int&, /* LDVT*/
 	       float* /* WORK*/, int&, /* LWORK*/
 	       int & /* INFO */);

  void sgemm_ (char *transa, char *transb, int &m, int  &n,  int  &k,
               float &alpha, float *a, int &lda, float *b, int &ldb,
               float &beta, float *c, int &ldc);


  //extern int dgecon_(const char *norm, const int *n, double *a, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info, int len);

  void sgecon_(char *norm, int &n, float *a, int &lda, float &anorm, 
	       float &rcond, float *work, int *iwork, int &info);


  float slange_(char *norm, int &m, int &n, float *a, int &lda, float *work);


  //  void sgesvd_( char* jobu, char* jobvt, int* m, int* n, float* a,
  //	       int* lda, float* s, float* u, int* ldu, float* vt, int* ldvt,
  //	       float* work, int* lwork, int* info );
}

 

namespace slmotion {
  extern int debug;
  void kmeans(const vector<vector<float> > &samples,const vector<bool> *dimMask,vector<size_t> &labels,size_t K,size_t tolerance,vector<vector<float> > *m){


    size_t dim=samples[0].size();
    vector<size_t> activeDim;

    for(size_t d=0;d<dim;d++)
      if(dimMask==NULL || (*dimMask)[d]) activeDim.push_back(d);
    
    dim=activeDim.size();


    // initialise labels randomly

    int div=RAND_MAX /K +1;
   
    labels=vector<size_t>(samples.size());

    for(size_t i=0;i<labels.size();i++)
      labels[i]=rand()/div;  

    vector<vector<float> > means(K);
    vector<size_t> counts;

    size_t changecount;

    do{
      // determine the means according to labels

      means=vector<vector<float> >(K,vector<float>(dim,0.0));
      counts=vector<size_t>(K,0);

      for(size_t i=0;i<samples.size();i++){
	for(size_t d=0;d<dim;d++)
	  means[labels[i]][d] += samples[i][activeDim[d]];
	counts[labels[i]]++;
      }

      for(size_t k=0;k<K;k++)
	if(counts[k])
	  for(size_t d=0;d<dim;d++)
	    means[k][d] /= counts[k];
      
      // set labels according to means

      changecount=0;

      for(size_t i=0;i<samples.size();i++){

	float mindist=0,dist;
	size_t newlabel=0;

	for(size_t d=0;d<dim;d++){
	  float diff=means[0][d]-samples[i][activeDim[d]];
	  mindist += diff*diff;
	}

	for(size_t k=1;k<K;k++){
	  dist=0;
	  for(size_t d=0;dist<mindist && d<dim;d++){
	    float diff=means[k][d]-samples[i][activeDim[d]];
	    dist += diff*diff;
	  }
	  if(dist<mindist){
	    mindist=dist;
	    newlabel=k;
	  }
	}

	if(newlabel != labels[i]){
	  labels[i]=newlabel;
	  changecount++;
	}

      }

      cerr << "Kmeans: changecount="<<changecount<<endl;
      

    } while (changecount>tolerance); 


    if(m)
      *m=means;

  }

  float sqrdist(const vector<float> &v1,const vector<float> &v2,float mindist){

    size_t dim=v1.size();

    float ret=0;

    for(size_t d=0;d<dim&&(mindist<0||ret<mindist);d++){
      float diff=v1[d]-v2[d];
      ret += diff*diff;
    }
    
    return ret;

  }



  float vectorsqrdist(const vector<float> &v1,
		      const vector<float> &v2,float min)
  {
    const float big=99999999;

    if(min<0) min=big;
    if(v1.size() != v2.size()){
      cerr << "vectorsqrdist(): v1.size() != v2.size()"<<endl;
      exit(-1);
    }
    float dist=0;
    for(size_t i=0;i<v1.size();i++){
      float d=v1[i]-v2[i];
      dist += d*d;
      if(dist>min) return big;
    }
    return dist;
  }

  int randomint(int n){

    // returns evenly distributed random integer from [0,n-1]

    double r;
    do{
      r=rand()/((double)RAND_MAX);
    } while (r>= 1.0);
    return (int)(r*n);
  }

  int countnbroverlap(bool m1[3][3],bool m2[3][3]){

    int sum=0;

    //    cerr << "testing overlap of matrices " << endl;
    for(int y=0;y<3;y++){
      for(int x=0;x<3;x++)
	cerr << (int)m1[y][x] << " ";
      cerr << endl;
    }

    //    cerr << " and " << endl;

    for(int y=0;y<3;y++){
      for(int x=0;x<3;x++)
	cerr << (int)m2[y][x] << " ";
      cerr << endl;
    }

    for(int y=0;y<3;y++)
      for(int x=0;x<3;x++)
	if(m1[y][x]&&m2[y][x])
	  sum++;

    //    cerr << "overlap="<<sum<<endl;

    return sum;

  }

  float determineStrengthUnderMasks(Mat &mat, size_t y, size_t x, bool *m1, bool *m2){


    // first find largest and second-largest element under both masks

    float l1=-1,l2=-1,s1=-1,s2=-1;

    int maxind1=-1,maxind2=-1;

    int dx[]={1,1,0,-1,-1,-1,0,1};
    int dy[]={0,-1,-1,-1,0,1,1,1};

    for(int dir=0;dir<8;dir++){
      if(m1[dir] && mat.at<float>(y+dy[dir],x+dx[dir])>l1){
	l1=mat.at<float>(y+dy[dir],x+dx[dir]);
	maxind1=dir;
      }
      if(m2[dir] && mat.at<float>(y+dy[dir],x+dx[dir])>l2){
	l2=mat.at<float>(y+dy[dir],x+dx[dir]);
	maxind2=dir;
      }
    }

    for(int dir=0;dir<8;dir++){
      if(m1[dir] && dir != maxind1 && mat.at<float>(y+dy[dir],x+dx[dir])>s1){
	s1=mat.at<float>(y+dy[dir],x+dx[dir]);
      }
      if(m2[dir] && dir != maxind2 && mat.at<float>(y+dy[dir],x+dx[dir])>s2){
	s2=mat.at<float>(y+dy[dir],x+dx[dir]);
      }
    }

    return fmin(fmin(l1,l2),fmax(s1,s2));

  }


  vector<float> hsvtrig2hsv(const vector<float> &src){
    vector<float> ret(3);

    ret[2]=src[2]; // v
    if(src[0]!=0){
      ret[0]=atan(src[1]/src[0]);
      ret[1]=src[0]/(cos(ret[0]));
    } else{
      if(src[1]==0){
	ret[0]=0;
	ret[1]=0;
      } else{

	float pii=3.141593;

	ret[0]=pii*(0.5+(src[1]>0)?0:1);
	ret[1]=fabs(src[1]);

      }
    }

    return ret;

  }



  void pseudoinverse(int m, int n, float *a, int lda, int &resm, int &resn,
		     float** res, int &ldres){

    // allocates space for the result matrix


    if(m>=n){

      // more rows than columns

      resm=n;
      resn=m;

      float *t;
      int tm,tn,ldt;

      // multiplymat(m,n,a,lda,m,n,a,lda,true,false,tm,tn,t,ldt);
      multiplymat(m,n,a,lda,m,n,a,lda,true,false,tm,tn,&t,ldt);
      
      // print_matrix("inverting matrix", tm, tn, t, ldt );

      invertmatrixinplace(tm,tn,t,ldt);
      multiplymat(tm,tn,t,ldt,m,n,a,lda,false,true,resm,resn,res,ldres);

      delete []t;

    }
    else{

      resm=n;
      resn=m;

      float *t;
      int tm,tn,ldt;

      multiplymat(m,n,a,lda,m,n,a,lda,false,true,tm,tn,&t,ldt);
      invertmatrixinplace(tm,tn,t,ldt);
      multiplymat(m,n,a,lda,tm,tn,t,ldt,true,false,resm,resn,res,ldres);

      delete []t;


    }  
  }

  void multiplymat(int m1, int n1, float *a1,int lda1,
		   int m2, int n2, float *a2,int lda2,
		   bool transpose1, bool transpose2,
		   int &resm, int &resn, float** res,int &ldres){
      
    resm= transpose1 ? n1 : m1; 
    resn= transpose2 ? m2 : n2; 

    int innerdim1 = transpose1 ? m1 : n1; 
    int innerdim2 = transpose2 ? n2 : m2;

    assert(innerdim1==innerdim2);

    ldres=resm;
    *res = new float[resm*resn];

    char T1[]="N",T2[]="N";
    float alpha=1,beta=0;

    if(transpose1) T1[0]='T';
    if(transpose2) T2[0]='T';

    sgemm_(T1,T2,resm,resn,innerdim1,alpha,a1,lda1,a2,lda2,beta,*res,ldres);

    //       return;


    //       for(int rm=0;rm<resm;rm++)
    // 	for(int rn=0;rn<resn;rn++){
    // 	  float *tgt=res+(rm+ldres*rn);
    // 	  *tgt=0;
    // 	  for(int i=0;i<innerdim1;i++){
    // 	    int ind1= transpose1 ? i+rm*lda1 : rm+i*lda1;
    // 	    int ind2= transpose2 ? rn+i*lda2 : i + rn*lda2;
    // 	    *tgt += a1[ind1]*a2[ind2];
    // 	  }
    // 	}
  }
    
  void invertmatrixinplace(int /*m*/, int n, float *a,int lda){


    int col = n, *ipiv = new int[n], info = -1;

    sgetrf_(col, col, (float*)(void*)a, lda, ipiv, info);
    if (info) {
      cerr << "invertmatrixinplace() sgetrf_() failed." << endl;
    }
    info = -1; 
    int lwork = col*col;
    float *work = new float[lwork];
      
    sgetri_(col, (float*)(void*)a, lda, ipiv, work, lwork, info);
    if (info) {
      cerr << "invertmatrixinplace() sgetri_() failed." << endl;
    }
      
  }


  float rcond(int m, int n, float *a, int lda){

    int info;    

    float anorm, rcond;

    int *iw=new int[n];
    float *w=new float[(int)fmax(4*n,m)];

    /* Computes the norm of a */

    char mode[]="1";

    //     cout << "!!! calling slange_" << endl;

     anorm = slange_(mode, m, n, a, lda, w);

//     anorm=7;

    // manually coded to circumvent the problem in interfacing
    // w/ slange

//     cout << "obtained anorm=" << anorm << endl;

    /* Modifies a in place with a LU decomposition */
    sgetrf_(m, n, a, lda, iw, info);
    if (info != 0) throw string("rcond(): sgetrf failed");

//     cout << "LU decomposition performed" << endl;

//     print_matrix("LU:",m,n,a,lda);

    /* Computes the reciprocal norm */

    sgecon_(mode, n, a, lda, anorm, rcond, w, iw, info);
    if (info != 0) throw string("rcond(): sgecon failed");

//     cout << "obtained rcond=" << rcond << endl;

    delete[] w; delete[] iw;

    return rcond;

  }

 

  /* Auxiliary routine: printing a matrix */
  void print_matrix(const char* desc, int m, int n, float* a, int lda ) {
    int i, j;
    printf( "\n %s\n", desc );
    for( i = 0; i < m; i++ ) {
      for( j = 0; j < n; j++ ) printf( " %6.3f", a[i+j*lda] );
      printf( "\n" );
    }
  }

  void lapacktests(){

    cerr << "Testing the interface to blas/lapack" << endl;

    cerr << "Testing matrix multiplication" << endl;

    int m1=3,m2=3,n1=2,n2=2;
 
    float a1[]={1,2,3,4,5,6};
    float a2[]={1,2,3,4,5,6};

    int rm,rn,ldr;
    float *r;
 
    multiplymat(m1,n1,a1,m1,
		m2,n2,a2,m2,
		true,false,
		rm,rn,&r,ldr);

    print_matrix( "(transposed) left factor",m1,n1,a1,m1);
    print_matrix( "right factor",m2,n2,a2,m2);
    print_matrix( "product",rm,rn,r,ldr);
   
    delete[] r;

    //    cerr << "testing matrix inversion" << endl;

    //    print_matrix( "inverse of",m1,n1,a1,m1); 
    //    invertmatrixinplace(m1,n1,a1,m1); 
    //    print_matrix( "is",m1,n1,a1,m1); 

    //    multiplymat(m1,n1,a1,m1,
    // 	       m2,n2,a2,m2,
    // 	       false,false,
    // 	       rm,rn,r,ldr);

    //    print_matrix( "check by multiplication",rm,rn,r,ldr);

    //    delete[] r;

    cerr << "testing pseudoinverse of a matrix" << endl;

    print_matrix( "pseudoinverseinverse of",m1,n1,a1,m1); 
    pseudoinverse(m1,n1,a1,m1,rm,rn,&r,ldr); 
    print_matrix( "is",rm,rn,r,ldr); 

    float *r2;
    int rm2,rn2,ldr2;

    multiplymat(rm,rn,r,ldr,
		m1,n1,a1,m1,
		false,false,
		rm2,rn2,&r2,ldr2);


    print_matrix( "check by multiplication:",rm,rn,r,ldr);
    print_matrix( "times",m1,n1,a1,m1);
    print_matrix( "is",rm2,rn2,r2,ldr2);

    delete[] r;
    delete[] r2;

 //    int mb=300000,nb=300;
//     float *ab=new float[mb*nb];

//     for(int i=0;i<mb*nb;i++){
//       ab[i]=rand();
//       ab[i]/=RAND_MAX;
//     }

//     float *pib;
//     int pim,pin,lpib;
      

//     cerr << "calculating the pseudoinverse of a random" 
// 	 << mb <<"x"<<nb<<" matrix..." << endl;
//     pseudoinverse(mb,nb,ab,mb,pim,pin,&pib,lpib); 

//     cerr << "...done" << endl << "checking the result by multiplication:" << endl;

//     multiplymat(pim,pin,pib,lpib,
// 		mb,nb,ab,mb,
// 		false,false,
// 		rm2,rn2,&r2,ldr2);

//     float eps=0.0001;

//     bool ok=true;

//     for(int i=0;ok && i<rm2;i++)
//       for(int j=0;ok && j<rn2;j++){
// 	float tgt= i==j ? 1 : 0;
// 	if(fabs(tgt-r2[i+ldr2*j])>eps)
// 	  ok=false;
//       }

//     if(ok)
//       cerr << "  passed check." << endl;
//     else
//       cerr << "  check failed." << endl;
	


//     delete[] ab;
//     delete[] pib; 
//     delete[] r2;

    cout << "checking the condition number calculation" << endl;

    // generate a random 4x4 matrix

    float *mm=new float[4*4];

    for(int i=0;i<26;i++){
      mm[i]=rand();
      mm[i]/=RAND_MAX;
    }

    int mmm=4;

    cout << "the reciprocal condition number of a random 4x4 matrix " << endl;
    print_matrix( "mm=",mmm,mmm,mm,mmm);

    float rc=rcond(mmm,mmm,mm,mmm);

    cout << "is " << rc << endl;

    // float Xt[]={2,0,0,2};

    // float *a=Xt;

    // int col = 2;
    // int* ipiv = new int[2];
    // int info = -1;

    //    sgetrf_(col, col, (float*)(void*)a, col, ipiv, info);
    //    if (info) {
    //      cerr << "Matrix::Inverse() sgetrf_() failed." << endl;
    //    }
    //    info = -1; 
    //    int lwork = col*col;
    //    float *work = new float[lwork];

    //    sgetri_(col, (float*)(void*)a, col, ipiv, work, lwork, info);
    //    if (info) {
    //      cerr << "Matrix::Inverse() sgetri_() failed." << endl;
    //    }
    //    else{
    //      cerr << "got inverse" << endl;
    //      for(int i=0;i<col*col;i++)
    //        cerr << a[i] << endl;
    //    }

    //    delete[] work;

    // #define M 6
    // #define N 5
    // #define LDA M
    // #define LDU M
    // #define LDVT N
 
    //    int m = M, n = N, lda = LDA, ldu = LDU, ldvt = LDVT, info, lwork;
    //    float wkopt;
    //    float* work;
    //    /* Local arrays */
    //    float s[N], u[LDU*M], vt[LDVT*N];
    //    float a[LDA*N] = {
    //      8.79f,  6.11f, -9.15f,  9.57f, -3.49f,  9.84f,
    //      9.93f,  6.91f, -7.93f,  1.64f,  4.02f,  0.15f,
    //      9.83f,  5.04f,  4.86f,  8.83f,  9.80f, -8.99f,
    //      5.45f, -0.27f,  4.85f,  0.74f, 10.00f, -6.02f,
    //      3.16f,  7.98f,  3.01f,  5.80f,  4.27f, -5.31f
    //    };
    //    /* Executable statements */
    //    printf( " SGESVD Example Program Results\n" );
    //    /* Query and allocate the optimal workspace */
    //    lwork = -1;
    //    sgesvd_( "All", "All", &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &wkopt, &lwork,
    // 	   &info );
    //    lwork = (int)wkopt;
    //    cerr << "got optimal work size " << lwork << endl;

    //    work = (float*)malloc( lwork*sizeof(float) );
    //    /* Compute SVD */
    //    sgesvd_( "All", "All", &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork,
    // 	   &info );
    //    /* Check for convergence */
    //    if( info > 0 ) {
    //      printf( "The algorithm computing SVD failed to converge.\n" );
    //      exit( 1 );
    //    }
    //    /* Print singular values */
    //    print_matrix( "Singular values", 1, n, s, 1 );
    //    /* Print left singular vectors */
    //    print_matrix( "Left singular vectors (stored columnwise)", m, n, u, ldu );
    //    /* Print right singular vectors */
    //    print_matrix( "Right singular vectors (stored rowwise)", n, n, vt, ldvt );
    //    /* Free workspace */
    //    free( (void*)work );
    //    exit( 0 );


    // testing svd with matrices M = U S V*

    // M = [ 1 0 0 0 2; 0 0 3 0 0; 0 0 0 0 0; 0 4 0 0 0]

    // should get

    // U= [0 0 1 0; 0 1 0 0; 0 0 0 -1; 1 0 0 0]
    // S= [4 0 0 0 0; 0 3 0 0 0; 0 0 sqrt(5) 0 0; 0 0 0 0 0]
    // V*= [0 1 0 0 0; 0 0 1 0 0; 1/sqrt(5) 0 0 0 2/sqrt(5); 0 0 0 1 0; -2/sqrt(5) 0 0 0 1/sqrt(5)]

    //    float Mtrans[]={1,0,0,0,0,0,0,4,0,3,0,0,0,0,0,0,2,0,0,0};

    //    char JOBU[] = "A", JOBVT[] = "A"; // return full matrices
 
    //    int M=4; // rows
    //    int N=5; // columns
 
    //    float *A=Mtrans;
    //    int LDA=M;

    //    int ls=fmin(N,M);

    //    float *S=new float[ls];
   
    //    float *U=new float[M*M]; // M * M matrix
    //    int LDU=M;
   
    //    float *VT=new float[N*N];
    //    int LDVT=N;
   
    //    float *WORK=new float[2];
    //    int LWORK=-1; // query first the optimal work size

    //    int INFO=-1; // the return value

    //    sgesvd_(JOBU,JOBVT,M,N,(float*)(void*)A,LDA,(float*)(void*)S,(float*)(void*)U,LDU,(float*)(void*)VT,LDVT,
    // 	   (float*)(void*)WORK,LWORK,INFO);

    //    cerr << "got the optimal work area size " << WORK[0] << endl;
    //    cerr << "INFO="<<INFO<<endl;

    //    LWORK=WORK[0];

    //    delete[] WORK;
    //    WORK=new float[LWORK];

    //    // now the actual svd call

    //    sgesvd_(JOBU,JOBVT,M,N,(float*)(void*)A,LDA,(float*)(void*)S,(float*)(void*)U,LDU,(float*)(void*)VT,LDVT,
    // 	   (float*)(void*)WORK,LWORK,INFO);

    //    if(INFO==0){

    //      cerr << "got singular values " << endl;
    //      for(int i=0;i<ls;i++)
    //        cerr << S[i] << " ";
    //      cerr << endl;

    //    }
    //    else{
    //      cerr << "SVD failed" << endl;
    //    }


    //    delete[] S;
    //    delete[] U;
    //    delete[] VT;
    //    delete[] WORK;

    //    exit(1);
 




  }

  float tanhtransfer(float x){

    return tanh(x);

  }

  void floatMatrix::writeToFile(FILE *f) const{
    fprintf(f,"%d %d\n",rows,cols);

    if(cols==0 || rows==0) return;

    for(int r=0;r<rows;r++){
      // write to file in row major order
      for(int c=0;c<cols;c++)
	fprintf(f," %f",dptr[r+c*rows]);
      fprintf(f,"\n");
    }
       

  }

  bool floatMatrix::readFromFile(FILE *f){
    delete[] dptr; dptr=NULL;
    rows=cols=-1;
    if(fscanf(f,"%d%d",&rows,&cols)!=2)
      return false;

    allocate();
    
    for(int r=0;r<rows;r++)
      // storage in file file in row major order
      for(int c=0;c<cols;c++)
	if(fprintf(f,"%f",dptr[r+c*rows])!=1)
	  return false;

    return true;
  }

  floatMatrix floatMatrix::clone() const{
    floatMatrix ret;
    ret.rows=rows;
    ret.cols=cols;
    ret.allocate();

    memcpy(ret.dptr,dptr,rows*cols*sizeof(float));

    return ret;

  }


}
