## ----echo=FALSE, include=FALSE------------------------------------------------
library("Rsamtools")
## ----ScanBamParam-------------------------------------------------------------
which <- GRanges(c(
"seq1:1000-2000",
"seq2:100-1000",
"seq2:1000-2000"
))
## equivalent:
## GRanges(
## seqnames = c("seq1", "seq2", "seq2"),
## ranges = IRanges(
## start = c(1000, 100, 1000),
## end = c(2000, 1000, 2000)
## )
## )
what <- c("rname", "strand", "pos", "qwidth", "seq")
param <- ScanBamParam(which=which, what=what)
## ----scanBam, keep.source=TRUE------------------------------------------------
bamFile <- system.file("extdata", "ex1.bam", package="Rsamtools")
bam <- scanBam(bamFile, param=param)
## ----scanBam-elts-1-----------------------------------------------------------
class(bam)
names(bam)
## ----scanBam-elts-2-----------------------------------------------------------
class(bam[[1]])
names(bam[[1]])
## ----scanBam-elts-class-------------------------------------------------------
sapply(bam[[1]], class)
## ----scanBam-to-IRanges-------------------------------------------------------
.unlist <- function (x)
{
## do.call(c, ...) coerces factor to integer, which is undesired
x1 <- x[[1L]]
if (is.factor(x1)) {
structure(unlist(x), class = "factor", levels = levels(x1))
} else {
do.call(c, x)
}
}
bam <- unname(bam) # names not useful in unlisted result
elts <- setNames(bamWhat(param), bamWhat(param))
lst <- lapply(elts, function(elt) .unlist(lapply(bam, "[[", elt)))
## ----lst-to-DataFrame---------------------------------------------------------
head(do.call("DataFrame", lst))
## ----indexed-file-------------------------------------------------------------
list.files(dirname(bamFile), pattern="ex1.bam(.bai)?")
## ----bam-remote, eval=FALSE---------------------------------------------------
# which <- GRanges("6:100000-110000")
# param <- ScanBamParam(which=which, what=scanBamWhat())
# na19240bam <- scanBam(na19240url, param=param)
## ----summaryFunction----------------------------------------------------------
summaryFunction <-
function(seqname, bamFile, ...)
{
param <- ScanBamParam(
what=c('pos', 'qwidth'),
which=GRanges(seqname, IRanges(1, 1e7)),
flag=scanBamFlag(isUnmappedQuery=FALSE)
)
x <- scanBam(bamFile, ..., param=param)[[1]]
coverage(IRanges(x[["pos"]], width=x[["qwidth"]]))
}
## ----summaryByChromosome------------------------------------------------------
seqnames <- paste("seq", 1:2, sep="")
cvg <- lapply(seqnames, summaryFunction, bamFile)
names(cvg) <- seqnames
cvg
## ----keggrest, eval = FALSE---------------------------------------------------
# ## uses KEGGREST, dplyr, and tibble packages
# org <- "hsa"
# caffeine_pathway <-
# KEGGREST::keggList("pathway", org)
# tibble::enframe(name = "pathway_id", value = "pathway")
# dplyr::filter(startsWith(.data$pathway, "Caffeine metabolism"))
#
# egid <-
# KEGGREST::keggLink(org, "pathway") %>%
# tibble::enframe(name = "pathway_id", value = "gene_id")
# dplyr::left_join(x = caffeine_pathway, by = "pathway_id")
# dplyr::mutate(gene_id = sub("hsa:", "", gene_id))
# pull(gene_id)
## ----caffeine-kegg------------------------------------------------------------
egid <- c("10", "1544", "1548", "1549", "7498", "9")
## ----txdb-transcripts, message=FALSE------------------------------------------
library(TxDb.Hsapiens.UCSC.hg18.knownGene)
bamRanges <- transcripts(
TxDb.Hsapiens.UCSC.hg18.knownGene,
filter=list(gene_id=egid)
)
seqlevels(bamRanges) <- # translate seqlevels
sub("chr", "", seqlevels(bamRanges))
lvls <- seqlevels(bamRanges) # drop unused levels
seqlevels(bamRanges) <- lvls[lvls %in% as.character(seqnames(bamRanges))]
bamRanges
## ----bamRanges-genome---------------------------------------------------------
unique(genome(bamRanges))
## ----BamViews-parts-----------------------------------------------------------
slxMaq09 <- local({
fl <- system.file("extdata", "slxMaq09_urls.txt", package="Rsamtools")
readLines(fl)
})
## ----BamViews-construct, keep.source=TRUE-------------------------------------
bamExperiment <-
list(description="Caffeine metabolism views on 1000 genomes samples",
created=date())
bv <- BamViews(
slxMaq09, bamRanges=bamRanges, bamExperiment=bamExperiment
)
metadata(bamSamples(bv)) <-
list(description="Solexa/MAQ samples, August 2009",
created="Thu Mar 25 14:08:42 2010")
bv
## ----BamViews-query-----------------------------------------------------------
bamExperiment(bv)
## ----bamIndicies,eval=FALSE---------------------------------------------------
# bamIndexDir <- tempfile()
# indexFiles <- paste(bamPaths(bv), "bai", sep=".")
# dir.create(bamIndexDir)
# bv <- BamViews(
# slxMaq09,
# file.path(bamIndexDir, basename(indexFiles)), # index file location
# bamRanges=bamRanges,
# bamExperiment=bamExperiment
# )
#
# idxFiles <- mapply(
# download.file, indexFiles,
# bamIndicies(bv),
# MoreArgs=list(method="curl")
# )
## ----readGAlignments, eval=FALSE----------------------------------------------
# library(GenomicAlignments)
# olaps <- readGAlignments(bv)
## ----olaps, message=FALSE-----------------------------------------------------
library(GenomicAlignments)
load(system.file("extdata", "olaps.Rda", package="Rsamtools"))
olaps
head(olaps[[1]])
## ----read-lengths-------------------------------------------------------------
head(t(sapply(olaps, function(elt) range(qwidth(elt)))))
## ----focal--------------------------------------------------------------------
rng <- bamRanges(bv)[1]
strand(rng) <- "*"
olap1 <- endoapply(olaps, subsetByOverlaps, rng)
olap1 <- lapply(olap1, "seqlevels<-", value=as.character(seqnames(rng)))
head(olap1[[24]])
## ----olap-cvg, keep.source=TRUE-----------------------------------------------
minw <- min(sapply(olap1, function(elt) min(start(elt))))
maxw <- max(sapply(olap1, function(elt) max(end(elt))))
cvg <- endoapply(
olap1, coverage,
shift=-start(ranges(bamRanges[1])),
width=width(ranges(bamRanges[1]))
)
cvg[[1]]
## ----olap-cvg-as-m------------------------------------------------------------
m <- matrix(unlist(lapply(cvg, lapply, as.vector)), ncol=length(cvg))
summary(rowSums(m))
summary(colSums(m))
## ----sessionInfo--------------------------------------------------------------
packageDescription("Rsamtools")
sessionInfo()
## ----eval=FALSE---------------------------------------------------------------
# library(RCurl)
# ftpBase <-
# "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/"
# indivDirs <-
# strsplit(getURL(ftpBase, ftplistonly=TRUE), "\n")[[1]]
# alnDirs <-
# paste(ftpBase, indivDirs, "/alignment/", sep="")
# urls0 <-
# strsplit(getURL(alnDirs, dirlistonly=TRUE), "\n")
## ----bam-index, eval=FALSE----------------------------------------------------
# urls <- urls[sapply(urls0, length) != 0]
# fls0 <- unlist(unname(urls0))
# fls1 <- fls0[grepl("bai$", fls0)]
# fls <- fls1[sapply(strsplit(fls1, "\\."), length)==7]
## ----slxMaq09, eval=FALSE-----------------------------------------------------
# urls1 <- Filter(
# function(x) length(x) != 0,
# lapply(urls, function(x) x[grepl("SLX.maq.*2009_08.*bai$", x)])
# )
# slxMaq09.bai <-
# mapply(paste, names(urls1), urls1, sep="", USE.NAMES=FALSE)
# slxMaq09 <- sub(".bai$", "", slxMaq09.bai)
## ----bam-Indicies, eval=FALSE-------------------------------------------------
# bamIndexDir <- tempfile()
# dir.create(bamIndexDir)
# idxFiles <- mapply(
# download.file, slxMaq09.bai,
# file.path(bamIndexDir, basename(slxMaq09.bai)),
# MoreArgs=list(method="curl")
# )