# Functions for probabilistic drug connectivity mapping

# Copyright (C) 2013-2014 Juuso Parkkinen.
# All rights reserved.
# 
# This program is open source software; you can redistribute it and/or modify it under the terms of the FreeBSD License (keep this notice): http://en.wikipedia.org/wiki/BSD_licenses
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.


## Function for running single drug retrieval
compute_probcmap_single <- function(model.Z=NULL, query.Z=NULL) {
  
  if (is.null(model.Z))
    stop("Specify the GFA factor matrix 'model.Z' to be used!")
  
  # Distance between single drugs is defined as one minus the Pearson correlation between the factors Z
  message("Computing probabilistic connectivity mapping (single)...", appendLF=TRUE)
  
  # If no query given, return distance matrix within the Connectivity Map data
  if (is.null(query.Z)) {
    distmat <- 1 - cor(t(model.Z))
    
    # If a vector of queries is given, compute distances from the queries to the CMap drugs
  } else {
    distmat <- t(1 - cor(t(model.Z), t(query.Z)))
  }
  
  message("DONE")
  # Return resulting distances  
  return(distmat)
}


# Function for running combinatorial drug retrieval
compute_probcmap_combinatorial <- function(model.Z=NULL, query.Z=NULL) {
  
  if (is.null(model.Z))
    stop("Specify the GFA factor matrix 'model.Z' to be used!")

  if (is.null(query.Z))
    stop("Specify query factor to be used!")
  
  message("Computing probabilistic connectivity mapping (combinatorial)...", appendLF=TRUE)
  message("Warning! This is slow!", appendLF=TRUE)
  
  # Initialize result array
  drugs.all <- rownames(model.Z)
  drugs.query <- rownames(query.Z)
  comb.dist <- array(NA, dim=c(rep(length(drugs.all), 2), length(drugs.query)), dimnames=c(rep(list(drugs.all),2), list(drugs.query)))
  
  # Go through all query drugs
  for (q in seq(drugs.query)) {
    message("\nq: ", q, ": ", appendLF=FALSE)
    # Get z and compute standard score for query q
    q.Z <- query.Z[drugs.query[q], ]
    str.score.q <- (q.Z-mean(q.Z))/sqrt(1/(length(q.Z)-1)*sum((q.Z-mean(q.Z))^2))
    
    # Go through all drugs i
    for (i in 1:(length(drugs.all)-1)) {
      if (i %% 10 == 0)
        message(i, " ", appendLF=FALSE)
      # Get z and compute standard score for drug i
      i.Z <- model.Z[drugs.all[i], ]
      str.score.i <- (i.Z-mean(i.Z))/sqrt(1/(length(i.Z)-1)*sum((i.Z-mean(i.Z))^2))
      
      for (j in (i+1):length(drugs.all)) {
        
        # Get z and compute standard score for drug j
        j.Z <- model.Z[drugs.all[j], ]
        str.score.j <- (j.Z-mean(j.Z))/sqrt(1/(length(j.Z)-1)*sum((j.Z-mean(j.Z))^2))
        
        # Compute component-wise distance from query q to i and j
        ij.dist <- rbind(str.score.q * str.score.i, str.score.q * str.score.j)
        # Combine i and j based on ij.dist
        ij.Z.max <- i.Z
        j.inds <- which(apply(ij.dist, 2, which.max)==2)
        ij.Z.max[j.inds] <- j.Z[j.inds]
        
        # Compute final correlation
        comb.dist[i, j, q] <- 1 - cor(q.Z, ij.Z.max)
      }
    }
  }
  message("DONE")
  return(comb.dist)
}


