initialize.KBMF.asymmetric <- function(input) {
  
  parameters <- list()
  
  Dx <- dim(input$Kx)[1]
  Nx <- dim(input$Kx)[2]
  Px <- 1
  Dz <- dim(input$Kz)[1]
  Nz <- dim(input$Kz)[2]
  Pz <- dim(input$Kz)[3]
  
  
  init.mode <- 2
  
  # 1 in 1*Dx*mean.var.Kx is because the PTVE assumption is for 1 1-way component
  print('TODO: in variance calculations, assuming diagonal last kernel...')
  
  # 1way
  mean.var.Kx <- mean(apply(input$Kx, 2, var))
  
  print('assuming target data has been normalized to have sd 1')
  
  
  if (!is.null(input$init.paths$one_way_g)) {
  
    if (init.mode == 1) {
      
      load(input$init.paths$one_way_g)
      # no need to account for kernel weights
      print('is the genetic variability accounted for twice accidently?')
      parameters$var.Ax.tmp <- input$var.geno * learnt.params$feno$Vg / learnt.params$feno$Ve    
    } else {
      
      
      parameters$var.Ax.tmp <- input$var.geno * learnt.params$feno$Vg / learnt.params$feno$Ve  
      
    }
    
    
    
  } else {
    
    print('proportion of genetic variance is ad hoc...')
    parameters$var.Ax.tmp <- 0.1
  }
  
  
  
  
  # "average variance of the features in each kernel"
  # -need to take into account Dz after this!
  mean.var.Kz <- apply(apply(input$Kz, c(2,3), var),2,mean)
  
  
  if (input$fix.ez) {
    
    # "multiplication of each kernel with a scalar"
    mean.var.Kz <- mean.var.Kz * input$inits$ez$mu^2
    
  } else {
    
    # use expected kernel weights to estimate variances
    mean.var.Kz <- mean.var.Kz * rep((input$alpha_eta * input$beta_eta)^2, Pz)
  }
  
  
  
  
  # "summation over kernels"
  # -note the averaged feature variances
  mean.var.Kz <- sum(mean.var.Kz[1:(length(mean.var.Kz)-1)])
  if (init.mode==1) {
    
    parameters$var.Az.tmp <- input$PTVE.1.way.Z * input$var.env  / (Dz*mean.var.Kz)
    
  } else {

    # kernel means
    if (input$fix.ez) {
      
      k.weights <- input$inits$ez$mu
    } else {
      
      k.weights <- rep((input$alpha_eta * input$beta_eta), Pz)
    }
        
       
    Kz.means <- sum(apply(input$Kz, 3, mean) * k.weights)
    
    # the approximate variance that Az should have
    var.Az.tmp <- input$PTVE.1.way.Z * input$var.env  / (Dz*mean.var.Kz + Dz * Kz.means)
  }
  
  
  SIMULATE.VARIANCE <- TRUE
  if (SIMULATE.VARIANCE) {
    simulate.variance <- function() {
      n.scale.vals <- 150
      scales.tmp <- seq(from=0.1,to=15, length.out=n.scale.vals)
      scales.tmp <- sqrt(scales.tmp)
      n.reps <- 50
      vars.tmp <- array(NA, dim=c(Pz, n.reps, n.scale.vals))
      for (scale.tmp in 1:n.scale.vals) {
        
        for (iter.tmp in 1:n.reps) {
          
          weights.tmp <- matrix(rnorm(Dz, sd=scales.tmp[scale.tmp]*sqrt(var.Az.tmp)), nrow=Dz, ncol=1) 
          
          for (k.iter.tmp in 1:Pz) {
            
            vars.tmp[k.iter.tmp, iter.tmp, scale.tmp]  <- var(c(Kz[,,k.iter.tmp]%*% weights.tmp))
          }   
        }  
      }
      
      # fit a polynomial regression to smooth away random variation
      medians.tmp <- apply(apply(vars.tmp, c(2,3), sum), 2, quantile, prob=0.5)
      fit.tmp <- lm(medians.tmp ~ 0 + scales.tmp + I(scales.tmp^2))
      
      fitted.variance <- fit.tmp$coefficients[1]*scales.tmp + fit.tmp$coefficients[2]*(scales.tmp^2)
      
      # plot(x=scales.tmp, y=medians.tmp)
      #lines(x=scales.tmp, y=fitted.variance)
      correction.scale <- scales.tmp[which.min(abs(fitted.variance-input$var.env))]
      
      parameters$var.Az.tmp <- var.Az.tmp * correction.scale
      
      # final check
      # n.reps <- 100
      # vars.tmp <- array(NA, dim=c(Pz, n.reps))
      # 
      #   
      # for (iter.tmp in 1:n.reps) {
      #   
      #   weights.tmp <- matrix(rnorm(Dz, sd=sqrt(parameters$var.Az.tmp)), nrow=Dz, ncol=1) 
      #   
      #   for (k.iter.tmp in 1:Pz) {
      #     
      #     vars.tmp[k.iter.tmp, iter.tmp]  <- var(c(Kz[,,k.iter.tmp]%*% weights.tmp))
      #   }   
      # }
      # 
      # print(paste('should be', input$PTVE.1.way.Z * input$var.env, 'and is', median(colSums(vars.tmp))))
      # ok
    }
  
  }
  
  #print('1e-6 -> 1e-2 -> 1')
  # earlier tweeked the variance of the distribution but nothing
  # consistently useful was achieved
  parameters$alpha_lambdax_1w <- 1/(1*parameters$var.Ax.tmp^2)
  parameters$beta_lambdax_1w <- 1*parameters$var.Ax.tmp
  
  parameters$alpha_lambdaz_1w <- 1/(1*parameters$var.Az.tmp^2)
  parameters$beta_lambdaz_1w <- 1*parameters$var.Az.tmp
  
   
  # 2way
  if (init.mode==1) {
    
    var.Ax.2w.tmp <- input$var.geno * input$PTVE.2.way * input$PTVE.1.way.X / (Dx*mean.var.Kx)  
    
    Kz.tmp <- array(rep(input$inits$ez$mu, each=prod(dim(input$Kz)[1:2])), dim=dim(input$Kz))
    Kz.tmp <- input$Kz * Kz.tmp
    Kz.tmp <- apply(Kz.tmp, c(1,2), sum)
    mean.var.Kz <- mean(apply(Kz.tmp, 2, var))
    
    var.Az.2w.tmp <- input$var.env * input$PTVE.2.way * input$PTVE.1.way.Z / (Dz*mean.var.Kz)
    
    
    
    parameters$sigma_hx <- input$var.geno * input$PTVE.2.way * (1-input$PTVE.1.way.X)
    
    parameters$sigma_hz <- input$var.env * input$PTVE.2.way * (1-input$PTVE.1.way.Z)
    
    
    
    parameters$alpha_lambdax_2w <- 1/(1*var.Ax.2w.tmp^2)
    parameters$beta_lambdax_2w <- 1*var.Ax.2w.tmp
    
    parameters$alpha_lambdaz_2w <- 1/(1*var.Az.2w.tmp^2)
    parameters$beta_lambdaz_2w <- 1*var.Az.2w.tmp
    
    
    
  } else {
    # in principle, Dx has been taken into account by summation
    var.Ax.2w.tmp <- input$var.geno * input$PTVE.2.way * input$PTVE.1.way.X / (Dx * mean.var.Kx)  
  }
  
  
  return(parameters)
  
  
}