Skip to content
Prev 386618 / 398502 Next

making code (loop) more efficient

Indeed it was the issue with data.table. I converted it to data.frame
and it worked like a charm.
Thank you so much for your insight!

This is the code that worked:

library(parallel)
library(data.table)
library(doSNOW)

n <-  parallel::detectCores()
cl <- parallel::makeCluster(n, type = "SOCK")
doSNOW::registerDoSNOW(cl)
files <- list.files("/WEIGHTS1/Retina", pattern=".RDat", ignore.case=T)

lst_out <- foreach::foreach(i = seq_along(files),
                  .packages = c("data.table") ) %dopar% {

   tmp <- get(load(files[i]))
   a <- data.table::copy(tmp)
   a=as.data.frame(a)
   rm(tmp)
   gc()

   names <- rownames(a)
   if("blup" %in% colnames(a)) {
     data <- data.table(names, a["blup"])
     nm1 <- c("rsid", "ref_allele", "eff_allele")
     data[,  (nm1) := tstrsplit(names, ":")[-2]]
     out <- data[, .(rsid, weight = blup, ref_allele, eff_allele)][,
               WGT := files[i]][]
    } else {

     data <- data.table(names)
     nm1 <- c("rsid", "ref_allele", "eff_allele")
     data[,  (nm1) := tstrsplit(names, ":")[-2]]
     out <- data[, .(rsid,  ref_allele, eff_allele)][,
               WGT := files[i]][]
   }

    return(out)
   rm(data)
   gc()
 }
parallel::stopCluster(cl)

big_data <- rbindlist(lst_out, fill = TRUE)
On Wed, Dec 16, 2020 at 9:31 AM Ana Marija <sokovic.anamarija at gmail.com> wrote: