Skip to content
Prev 6546 / 21307 Next

[Bioc-devel] GenomicFiles: chunking

Looks like the test code didn't make it through. Attaching again ...
On 10/27/2014 11:35 AM, Valerie Obenchain wrote:
-------------- next part --------------
library(GenomicFiles)
library(microbenchmark)

## 5 bam files ranging from 4e+08 to 8e+08 records:
fls <- BamFileList(c("exp_srx036692.bam", "exp_srx036695.bam",
                     "exp_srx036696.bam", "exp_srx036697.bam",
                     "exp_srx036692.bam")) ## re-use one file

## GRanges with 100 ranges and total width 1e+06:
starts <- sample(1:1e7, 100)
chr <- paste0(rep("chr", 100), 1:22)
grs <- GRanges(chr,  IRanges(starts, width=1e4))


## By hand:
FUN1 <- function(ranges, files) {
    ## equivalent to MAP step
    cvg <- lapply(files,
        FUN = function(file, range) {
            param = ScanBamParam(which=range)
            coverage(file, param=param)[range]
        }, range=ranges)
    ## equivalent to REDUCE step
    do.call(cbind, lapply(cvg, mean))
}

microbenchmark(FUN1(grs[1:10], fls), FUN1(grs[1:100], fls), times=10)
## GenomicFiles:
MAP = function(range, file, ...) {
    param = ScanBamParam(which=range)
    coverage(file, param=param)[range]
}
REDUCE <- function(mapped, ...) do.call(cbind, lapply(mapped, mean))

FUN2 <- function(ranges, files) {
    reduceFiles(ranges, files, MAP, REDUCE, BPPARAM=SerialParam())
}

microbenchmark(FUN2(grs[1:10], fls), FUN2(grs[1:100], fls), times=10)