# this script contains source code used to generate enrichment scores # load the required functions source("EnrichmentScoreCalc.R") # load a matrix of normalized and filtered expression data # this matrix I am using was filtered to remove probes absents in all samples # and probes that are expressed below log2(100) across all samples. load("BA1_GCRMA_Normalized_Filtered.Rbin") # The matrix contains both the reference data set (in this case normal cells/tissues), # tumors and cell lines # first I define the primary cells/tissues as the reference against which # I will compute enrichment scores. # In this case I skipped the first column since it contains fetal spine - which is normal # but has no replicates (you must have replicates for some stats). primary<-data[,2:1503] cancer<-data[,1504:4475] # now load a file that will provide grouping of the normal reference. Samples in the same # group will be treated as replicates. The grouping used here can be found in the sample # annotation excel file in the download section. # here I omitted fetal spine since it was excluded above groups<-as.character(read.table("Groups.reference.txt",header=FALSE)[,1]) # make sure column names are valid R names colnames(cancer)<-make.names(colnames(cancer)) colnames(primary)<-make.names(colnames(primary)) groups<-make.names(groups) # this command will compute enrichment scores for each cancer sample. # primary cells are used as a reference. # be patient - it takes time to run this. enrich.cancer<-RunIndividualsAgainstRefForScore(cancer, primary, groups) # save the matrix as an R binary object save(enrich.cancer, file="enrichment.cancer.Rbin")