https://github.com/averissimo/notes-on-r

General notes and tips on R
https://github.com/averissimo/notes-on-r

Last synced: 6 months ago
JSON representation

General notes and tips on R

Host: GitHub
URL: https://github.com/averissimo/notes-on-r
Owner: averissimo
License: gpl-3.0
Created: 2016-12-13T16:59:39.000Z (over 9 years ago)
Default Branch: master
Last Pushed: 2016-12-13T21:18:11.000Z (over 9 years ago)
Last Synced: 2025-06-08T11:48:25.662Z (about 1 year ago)
Size: 19.5 KB
Stars: 0
Watchers: 2
Forks: 0
Open Issues: 0
Metadata Files:
- Readme: README.Rmd
- License: LICENSE

Awesome Lists containing this project

README

          ---

title: "Notes on R"

author: "André Veríssimo"

output: github_document

---

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = TRUE)

```

## Benchmarking parallel methods with `cor`

### Functions to build dataset

```{r fun_aux}

library(futile.logger)

genData <- function(reps = 100, sample.size = 1000) {

  genDataAux <- function(template = runif(1000), correlation = 1) {

    xdata <- array(0, length(template))

    correlated.ixs <- template < correlation

    xdata[correlated.ixs] <- template[correlated.ixs] + .0005 * runif(length(template[correlated.ixs]))

    xdata[!correlated.ixs] <- template[!correlated.ixs] + .05 * rnorm(length(template[!correlated.ixs]))

    return(xdata)

  }

  

  template <- runif(sample.size)

  dat <- sapply(seq(reps), function(e) {genDataAux(template , correlation = abs(.3 + runif(1))) })

  #

  flog.info('Size of generated data:', dim(dat), capture = T)

  return(list(dat = dat, template = template)) 

}

my.fun <- function(dat.ix) {

  cor(dat$template, dat$dat[,1], method = 'pearson')

}

```

### Setting up common benchmark parameters

```{r setup_params}

library(microbenchmark)

ntimes <- 10

#

dat <- genData(reps = 1, sample.size = 1000)

#

reps <- 100000;

flog.info('')

flog.info('')

flog.info('Repeating benchmarks %d times to find statistics', ntimes)

flog.info('  -> %d samples per variables', length(dat$template))

flog.info('  -> %d correlations being calculated', reps)

flog.info('')

```

### Benchmark Xapply functions

```{r xapply}

microbenchmark(

    lapply(1:reps, my.fun)

  , sapply(1:reps, my.fun)

  , vapply(1:reps, my.fun, array(double(0), 1))

  , times = ntimes)

```

### Benchmark different combinations of mcapply

```{r mclapply}

library(parallel)

microbenchmark(

    mclapply(1:reps, my.fun, mc.cores = 16)

  , mclapply(1:reps, my.fun, mc.cores = 15)

  , mclapply(1:reps, my.fun, mc.cores = 14)

  , times = ntimes)

```

### Benchamark different combinations of foreach

```{r foreach}

library(parallel)

library(foreach)

library(doMC) # parallel computing

registerDoMC(cores = 16) 

flog.info('16 cores')

microbenchmark(

    foreach(el = 1:reps) %dopar% { my.fun(el) }

  , foreach(el = 1:reps, .inorder = F ) %dopar% { my.fun(el) }

  , foreach(el = 1:reps, .combine = 'c') %dopar% { my.fun(el) }

  , foreach(el = 1:reps, .combine = 'c', .inorder = F ) %dopar% { my.fun(el) }

  , foreach(el = 1:reps, .combine = 'cbind') %dopar% { my.fun(el) }

  , foreach(el = 1:reps, .combine = 'cbind', .inorder = F ) %dopar% { my.fun(el) }

  , times = ntimes)

```

### Benchmark combinations of BiocParallel  

```{r bioc_parallel}

library(BiocParallel)

#

microbenchmark(

    bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(16))

  , bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(15))

  , bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(14))

  , times = ntimes)

```

ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Awesome

https://github.com/averissimo/notes-on-r

Awesome Lists containing this project

README