# this script contains source code used to generate enrichment scores
# load the required functions

source("EnrichmentScoreCalc.R")

# load a matrix of normalized and filtered expression data
# this matrix I am using was filtered to remove probes absents in all samples
# and probes that are expressed below log2(100) across all samples.

load("BA1_GCRMA_Normalized_Filtered.Rbin")

# The matrix contains both the reference data set (in this case normal cells/tissues), 
# tumors and cell lines

# first I define the primary cells/tissues as the reference against which
# I will compute enrichment scores.
# In this case I skipped the first column since it contains fetal spine - which is normal
# but has no replicates (you must have replicates for some stats).
primary<-data[,2:1503] 
cancer<-data[,1504:4475]

# now load a file that will provide grouping of the normal reference. Samples in the same
# group will be treated as replicates. The grouping used here can be found in the sample
# annotation excel file in the download section.
# here I omitted fetal spine since it was excluded above
groups<-as.character(read.table("Groups.reference.txt",header=FALSE)[,1])

# make sure column names are valid R names
colnames(cancer)<-make.names(colnames(cancer))
colnames(primary)<-make.names(colnames(primary))
groups<-make.names(groups)

# this command will compute enrichment scores for each cancer sample.
# primary cells are used as a reference.
# be patient - it takes time to run this.
enrich.cancer<-RunIndividualsAgainstRefForScore(cancer, primary, groups)

# save the matrix as an R binary object
save(enrich.cancer, file="enrichment.cancer.Rbin")