## ----example1tcgastep1,include=TRUE,results="hide",message=FALSE,warning=FALSE---- library(curatedOvarianData) library(sva) library(logging) ## ----example1tcgastep2_list, eval=FALSE--------------------------------------- # data(package="curatedOvarianData") ## ----example1tcgastep2-------------------------------------------------------- data(TCGA_eset) TCGA_eset ## ----example2loadstep1-------------------------------------------------------- source(system.file("extdata", "patientselection.config",package="curatedOvarianData")) ls() ## ----showls------------------------------------------------------------------- #remove.samples and duplicates are too voluminous: sapply( ls(), function(x) if(!x %in% c("remove.samples", "duplicates")) print(get(x)) ) ## ----example2loadstep2-------------------------------------------------------- source(system.file("extdata", "createEsetList.R", package = "curatedOvarianData")) ## ----example2loadstep3-------------------------------------------------------- names(esets) ## ----example3prepare---------------------------------------------------------- esets[[1]]$y forestplot <- function(esets, y="y", probeset, formula=y~probeset, mlab="Overall", rma.method="FE", at=NULL,xlab="Hazard Ratio",...) { require(metafor) esets <- esets[sapply(esets, function(x) probeset %in% featureNames(x))] coefs <- sapply(1:length(esets), function(i) { tmp <- as(phenoData(esets[[i]]), "data.frame") tmp$y <- esets[[i]][[y]] tmp$probeset <- exprs(esets[[i]])[probeset,] summary(coxph(formula,data=tmp))$coefficients[1,c(1,3)] }) res.rma <- metafor::rma(yi = coefs[1,], sei = coefs[2,], method=rma.method) if (is.null(at)) at <- log(c(0.25,1,4,20)) forest.rma(res.rma, xlab=xlab, slab=gsub("_eset$","",names(esets)), atransf=exp, at=at, mlab=mlab,...) return(res.rma) } ## ----example3plot, fig.width=8, fig.height=6---------------------------------- res <- forestplot(esets=esets,probeset="CXCL12",at=log(c(0.5,1,2,4))) ## ----filterdatasets----------------------------------------------------------- idx.tumorstage <- sapply(esets, function(X) sum(!is.na(X$tumorstage)) > 0 & length(unique(X$tumorstage)) > 1) idx.debulking <- sapply(esets, function(X) sum(X$debulking=="suboptimal",na.rm=TRUE)) > 0 ## ----example4plot, fig.width=8, fig.height=6---------------------------------- res <- forestplot(esets=esets[idx.debulking & idx.tumorstage], probeset="CXCL12",formula=y~probeset+debulking+tumorstage, at=log(c(0.5,1,2,4))) ## ----example5plot, fig.width=8, fig.height=6---------------------------------- res <- forestplot(esets=esets,probeset="CXCR4",at=log(c(0.5,1,2,4))) ## ----combine2----------------------------------------------------------------- combine2 <- function(X1, X2) { fids <- intersect(featureNames(X1), featureNames(X2)) X1 <- X1[fids,] X2 <- X2[fids,] ExpressionSet(cbind(exprs(X1),exprs(X2)), AnnotatedDataFrame(rbind(as(phenoData(X1),"data.frame"), as(phenoData(X2),"data.frame"))) ) } ## ----boxplot1, fig.width=8, fig.height=6-------------------------------------- data(E.MTAB.386_eset) data(GSE30161_eset) X <- combine2(E.MTAB.386_eset, GSE30161_eset) boxplot(exprs(X)) ## ----combat------------------------------------------------------------------- mod <- model.matrix(~as.factor(tumorstage), data=X) batch <- as.factor(grepl("DFCI",sampleNames(X))) combat_edata <- ComBat(dat=exprs(X), batch=batch, mod=mod) ## ----boxplot2, fig.width=8, fig.height=6-------------------------------------- boxplot(combat_edata) ## ----expand------------------------------------------------------------------- expandProbesets <- function (eset, sep = "///") { x <- lapply(featureNames(eset), function(x) strsplit(x, sep)[[1]]) eset <- eset[order(sapply(x, length)), ] x <- lapply(featureNames(eset), function(x) strsplit(x, sep)[[1]]) idx <- unlist(sapply(1:length(x), function(i) rep(i, length(x[[i]])))) xx <- !duplicated(unlist(x)) idx <- idx[xx] x <- unlist(x)[xx] eset <- eset[idx, ] featureNames(eset) <- x eset } X <- TCGA_eset[head(grep("///", featureNames(TCGA_eset))),] exprs(X)[,1:3] exprs(expandProbesets(X))[,1:3] ## ----featureData-------------------------------------------------------------- head(pData(featureData(GSE18520_eset))) ## ----annotationeg------------------------------------------------------------- annotation(GSE18520_eset) ## ----loadallsamples, echo=FALSE, results='hide'------------------------------- rm(list=ls()) source(system.file("extdata", "patientselection_all.config",package="curatedOvarianData")) source(system.file("extdata", "createEsetList.R", package = "curatedOvarianData")) ## ----heatmap, echo=FALSE, fig.width=10, fig.height=8-------------------------- .esetsStats <- function(esets) { res <- lapply(varLabels(esets[[1]]), function(covar) unlist(sapply(esets, function(X) sum(!is.na(X[[covar]]))>0))) names(res) <- varLabels(esets[[1]]) do.call(rbind, res) } df.r <- .esetsStats(esets) M <- as.matrix(apply(df.r,c(1,2),ifelse,0,1)) colnames(M) <- gsub("_eset$", "", colnames(M)) # no need to show the sample ids M <- M[-(1:2),] heatmap(M[nrow(M):1,],scale="none",margins=c(8,10),Rowv=NA) ## ----esetToTableFuns---------------------------------------------------------- source(system.file("extdata", "summarizeEsets.R", package = "curatedOvarianData")) ## ----esettable, echo=FALSE---------------------------------------------------- summary.table <- t(sapply(esets, getEsetData)) rownames(summary.table) <- sub("_eset", "", rownames(summary.table)) ## ----writeesettable----------------------------------------------------------- (myfile <- tempfile()) write.table(summary.table, file=myfile, row.names=FALSE, quote=TRUE, sep=",") ## ----display_table, echo=FALSE------------------------------------------------ # Display a subset of the table for the R markdown document knitr::kable( summary.table[, c(2, 3, 4, 5, 7)], caption="Datasets provided by curatedOvarianData. This is an abbreviated version of Table 1 of the manuscript; the full version is written by the write.table command above. Stage column is early/late/unknown, histology column is ser/clearcell/endo/mucinous/other/unknown." ) ## ----simplygetdata, eval=FALSE------------------------------------------------ # library(curatedOvarianData) # data(GSE30161_eset) # write.csv(exprs(GSE30161_eset), file="GSE30161_eset_exprs.csv") # write.csv(pData(GSE30161_eset), file="GSE30161_eset_clindata.csv") ## ----simplyseveraldatasets, eval=FALSE---------------------------------------- # data.to.fetch <- c("GSE30161_eset", "E.MTAB.386_eset") # for (onedata in data.to.fetch){ # print(paste("Fetching", onedata)) # data(list=onedata) # write.csv(exprs(get(onedata)), file=paste(onedata, "_exprs.csv", sep="")) # write.csv(pData(get(onedata)), file=paste(onedata, "_clindata.csv", sep="")) # } ## ----sessioninfo-------------------------------------------------------------- sessionInfo()