

initialize.H <- function(inputs) {
  
  # informative initialization
  
  parameters <- list()
  Y <- inputs$Y
  K.2way <- inputs$K.2way
  
  if (inputs$init.mode == 1) {
    # random initialization
    
    corr.coef.2way <- (inputs$var.geno + inputs$var.env)*inputs$PTVE.2.way / (inputs$var.geno * inputs$var.env)
    # both weighths for the 2-way effects are given a correction 
    # coefficient -> 1st sqrt(); var(aX) = a^2 var(X) -> 2nd sqrt()
    corr.coef.2way <- sqrt(sqrt(corr.coef.2way))
    
    Nx <- nrow(Y)
    Nz <- ncol(Y)
    
    parameters$Hx.2way <- matrix(rnorm(K.2way*Nx, sd=sqrt(inputs$var.geno)*corr.coef.2way), nrow=K.2way, ncol=Nx)
    
    parameters$Hz.2way <- matrix(rnorm(K.2way*Nz, sd=sqrt(inputs$var.env)*corr.coef.2way), nrow=K.2way, ncol=Nz)
    
    if (!is.null(inputs$fixed.rows)) {
      parameters$Hx.1way <- rnorm(Nx, sd=sqrt(inputs$var.geno))
    }
    
    if (!is.null(inputs$fixed.cols)) {
      parameters$Hz.1way <- rnorm(Nz, sd=sqrt(inputs$var.env))
    }
    
  }
  
  else if (inputs$init.mode == 2) {
    
    
    # svd -initialization for 2-way
    # GBLUP -initialization for 1-way (genotype)
    # column mean -initialization for 1-way (environment)
    
    
    var.ratio <- rep(NA, 100)
    
    # learn a regularized svd
    tmp <- softImpute(Y, rank.max=K.2way, type='svd',lambda=5, maxit=500)
    tmp2 <- tmp
    
    
    
    # divide the variance asymmetrically so that it corresponds to
    # the var_geno/var_env ratio
    for (iter in 1:100) {
      
      tmp2$u <- t(t(tmp$u) * (tmp$d^((iter)/100)))
      tmp2$v <- t(t(tmp$v) * (tmp$d^((100-iter)/100)))
      
      var.ratio[iter] <- mean(apply(tmp2$u,2,var))/mean(apply(tmp2$v,2,var))
    }
    to.use <- which.min(abs(var.ratio - (inputs$var.geno / inputs$var.env)))
    to.use <- (1:100)[to.use]
    
    tmp$u <- t(t(tmp$u) * (tmp$d^(to.use/100)))
    tmp$v <- t(t(tmp$v) * (tmp$d^((100-to.use)/100)))
    
    
    # now the variance has been devided between Hx and Hz
    # next, scale the average variances of the components 
    # to var.geno and var.env
    
    # OLD: average sd's of components
    #Hx.coef <- mean(sqrt(apply(tmp$u,2,var)))
    #Hz.coef <- mean(sqrt(apply(tmp$v,2,var)))
    #variance.correction <- sqrt(mean(c(sqrt(inputs$var.env)/Hz.coef, sqrt(inputs$var.geno)/Hx.coef)))
    
    # global correction
    target.variance <- inputs$var.geno*inputs$PTVE.2.way
    variance.correction <- sqrt(sqrt(target.variance/var(c(tmp$u %*% t(tmp$v)))))
    
    
    # the initializations for 2-way components
    parameters$Hx.2way <- t(tmp$u*variance.correction)
    parameters$Hz.2way <- t(tmp$v*variance.correction)
    
    # check
    # print(var(c(t(parameters$Hx.2way) %*% parameters$Hz.2way)))
    # ok
    
    # next 1-way components
    if (!is.null(inputs$fixed.rows)) {
      
      
      if (is.null(inputs$init.rownames[[1]])) {
        
        print('rowMeans initialization for 1-way effects')
        # cannot use e.g. softImpute's 2-way mean as there are
        # so many missing values
        
        tmp.vals <- rowMeans(Y, na.rm=T)
        tmp.vals[is.nan(tmp.vals)] <- 0
        parameters$Hx.1way <- tmp.vals
        
      } else {
        
        load(inputs$init.paths$one_way_g)
        
        var.orig <- var(c(learnt.params$GBLUP.preds[inputs$init.rownames[[1]],]), na.rm=T)  
        
        
        parameters$Hx.1way <- learnt.params$GBLUP.preds[inputs$init.rownames[[1]],]
        parameters$Hx.1way[which(is.na(parameters$Hx.1way))] <- 0
        
        # normalize the variance of the 1-way (X) components to the
        # variance in that direction
        parameters$Hx.1way <- parameters$Hx.1way * sqrt(inputs$var.geno)/sqrt(var.orig)
        
      }
      
    }
    
    
    # initialize 1-way components to zero
    if (!is.null(inputs$fixed.cols)) {
      
      parameters$Hz.1way <- rep(0, ncol(Y))
    }
    
  } else if (inputs$init.mode == 3) {
    
    
    # next 1-way components
    if (!is.null(inputs$fixed.rows)) {
      
      
      if (is.null(inputs$init.rownames[[1]])) {
        
        print('rowMeans initialization for 1-way effects')
        # cannot use e.g. softImpute's 2-way mean as there are
        # so many missing values
        
        tmp.vals <- rowMeans(Y, na.rm=T)
        tmp.vals[is.nan(tmp.vals)] <- 0
        parameters$Hx.1way <- tmp.vals
        
      } else {
        
        #load(experiment$paths$GBLUP.solutions)
        load(inputs$init.paths$one_way_g)
        
        #if (sum(abs(learnt.params$GBLUP.preds-GBLUP.solutions$ncv[[test.fold]][[validation.fold]])) > 1e-10) print('check me')
        
        var.orig <- var(c(gblup.preds$GBLUP.preds[inputs$init.rownames[[1]],]), na.rm=T)  
        
        
        parameters$Hx.1way <- gblup.preds$GBLUP.preds[inputs$init.rownames[[1]],]
        parameters$Hx.1way[which(is.na(parameters$Hx.1way))] <- 0
        
        parameters$Ax.1way.GBLUP <- gblup.preds$Kbeta
        
      }
    
      # deregress 1-way g effects from Y
      to.deregress <- outer(parameters$Hx.1way, rep(1, ncol(Y)))  
      Y <- Y-to.deregress
    }
    
    
    
    
    
    # svd -initialization for 2-way
    # GBLUP -initialization for 1-way (genotype)
    # column mean -initialization for 1-way (environment)
    
    
    var.ratio <- rep(NA, 100)
    
    # learn a regularized svd
    tmp <- softImpute(Y, rank.max=K.2way, type='svd',lambda=5, maxit=500)
    tmp2 <- tmp
    
    
    
    # divide the variance asymmetrically so that it corresponds to
    # the var_geno/var_env ratio
    for (iter in 1:100) {
      
      tmp2$u <- t(t(tmp$u) * (tmp$d^((iter)/100)))
      tmp2$v <- t(t(tmp$v) * (tmp$d^((100-iter)/100)))
      
      var.ratio[iter] <- mean(apply(tmp2$u,2,var))/mean(apply(tmp2$v,2,var))
    }
    to.use <- which.min(abs(var.ratio - (inputs$var.geno / inputs$var.env)))
    to.use <- (1:100)[to.use]
    
    tmp$u <- t(t(tmp$u) * (tmp$d^(to.use/100)))
    tmp$v <- t(t(tmp$v) * (tmp$d^((100-to.use)/100)))
    
    
    # now the variance has been devided between Hx and Hz
    # next, scale the average variances of the components 
    # to var.geno and var.env
    
    # OLD: average sd's of components
    #Hx.coef <- mean(sqrt(apply(tmp$u,2,var)))
    #Hz.coef <- mean(sqrt(apply(tmp$v,2,var)))
    #variance.correction <- sqrt(mean(c(sqrt(inputs$var.env)/Hz.coef, sqrt(inputs$var.geno)/Hx.coef)))
    
    # global correction
    target.variance <- inputs$var.geno*inputs$PTVE.2.way
    variance.correction <- sqrt(sqrt(target.variance/var(c(tmp$u %*% t(tmp$v)))))
    
    
    # the initializations for 2-way components
    parameters$Hx.2way <- t(tmp$u*variance.correction)
    parameters$Hz.2way <- t(tmp$v*variance.correction)
    
    # check
    # print(var(c(t(parameters$Hx.2way) %*% parameters$Hz.2way)))
    # ok
    

    
    # initialize 1-way components to zero
    if (!is.null(inputs$fixed.cols)) {
      
      parameters$Hz.1way <- rep(0, ncol(Y))
    }
    
  }
  
  return(parameters)
  
  
}