# SCRIPT FILE TO REGENERATE THE RESULTS FOR Faisal et al., 11 NIPS PM

# Author: Ali Faisal 
# Dated: Jun. 2011

#Start R in the unpacked: camda_daSAr folder
rm(list=ls())
#datapair = "cghexp" #OR "mthynexp" Choose the data pair to run the experiments
datapair = "mthynexp"
preprocessing = FALSE #FALSE: Use preprocessed data, TRUE: Perform preprocessing and start from raw-data. Note preprocessing is time consuming as the CGH data is huge.
simcca = FALSE # FALSE: Use pre-calculated depedency scores from the constraint canonical correlation analysis model: simCCA (implmented as R-"pint" package), TRUE: Model and run simCCA to calculate dependency scores from preprocessed data. 
sigsimcca = FALSE #FALSE: Use precomputed permuted dependency scores for on randomized data, this is used to calculate p-values. Note setting option to TRUE and running simCCA could be time consuming as we need to run it for permuted data sets.
kmanalysis = FALSE #Run KM survival association for potential survival associated regions.
cenrichments = FALSE #Reads in confidential clinical information that requires approval from NIH. If you have permission download the file "clinical_patient_all_GBM.txt" and "intgen.org_clinical_drug_all_GBM,txt" from NIH and store the those in the data directory. The function will read those and compute enrichments.

path = getwd()
source(file.path(path,"setpaths.R")) # set directory paths.
source(file.path(path,"functionz.R")) #loads preprocessing and many different analysis functions.
source(file.path(path, "analysis.R")) #KM analysis and Survival learning functions

if(preprocessing) {
  library(biomaRt)
  source(file.path(path,"loaddata.R")) #loads and maps features to ensembl ids.
}

if(simcca) {
  library(pint)
  if(sigsimcca) source(file.path(path,"calc_sig.R")) #Dependency scores on permuted chromosomal regions

  source(file.path(path,"2viewsimcca.R")) #Calculates significant high dependent chromosomal regions
}

proj_dir = path
current_rundir = res_dir

if(kmanalysis) {
  source("km_input.R")	#Transforms data for Anduril KM survival association tests ... ok
  library(biomaRt)
  summerize.km()	#Annotates and writes the significant survival associated regions in 2 files (1. with pvals) and (2. based on qvals) .... ok
}

if(cenrichments)
{
  #source(file.path(path,"load_clinicalinfo.R")) #Preprocesses the clinical and drug information.
  clin_enrichment() #Checks for enrichment of clinical factors like Age,Gender and Race, and writes the output.
}


#source(file.path(path,"getROI_genenames.R")) # Annotates the Survival associated ROI with gene names.
