An open API service indexing awesome lists of open source software.

https://github.com/averissimo/notes-on-r

General notes and tips on R
https://github.com/averissimo/notes-on-r

Last synced: 4 months ago
JSON representation

General notes and tips on R

Awesome Lists containing this project

README

          

---
title: "Notes on R"
author: "André Veríssimo"
output: github_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Benchmarking parallel methods with `cor`

### Functions to build dataset

```{r fun_aux}
library(futile.logger)

genData <- function(reps = 100, sample.size = 1000) {
genDataAux <- function(template = runif(1000), correlation = 1) {
xdata <- array(0, length(template))
correlated.ixs <- template < correlation
xdata[correlated.ixs] <- template[correlated.ixs] + .0005 * runif(length(template[correlated.ixs]))
xdata[!correlated.ixs] <- template[!correlated.ixs] + .05 * rnorm(length(template[!correlated.ixs]))
return(xdata)
}

template <- runif(sample.size)
dat <- sapply(seq(reps), function(e) {genDataAux(template , correlation = abs(.3 + runif(1))) })
#
flog.info('Size of generated data:', dim(dat), capture = T)
return(list(dat = dat, template = template))
}

my.fun <- function(dat.ix) {
cor(dat$template, dat$dat[,1], method = 'pearson')
}
```

### Setting up common benchmark parameters

```{r setup_params}
library(microbenchmark)

ntimes <- 10
#
dat <- genData(reps = 1, sample.size = 1000)
#
reps <- 100000;

flog.info('')
flog.info('')
flog.info('Repeating benchmarks %d times to find statistics', ntimes)
flog.info(' -> %d samples per variables', length(dat$template))
flog.info(' -> %d correlations being calculated', reps)
flog.info('')

```

### Benchmark Xapply functions

```{r xapply}
microbenchmark(
lapply(1:reps, my.fun)
, sapply(1:reps, my.fun)
, vapply(1:reps, my.fun, array(double(0), 1))
, times = ntimes)

```

### Benchmark different combinations of mcapply

```{r mclapply}
library(parallel)
microbenchmark(
mclapply(1:reps, my.fun, mc.cores = 16)
, mclapply(1:reps, my.fun, mc.cores = 15)
, mclapply(1:reps, my.fun, mc.cores = 14)
, times = ntimes)
```

### Benchamark different combinations of foreach

```{r foreach}
library(parallel)
library(foreach)
library(doMC) # parallel computing
registerDoMC(cores = 16)
flog.info('16 cores')
microbenchmark(
foreach(el = 1:reps) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .inorder = F ) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'c') %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'c', .inorder = F ) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'cbind') %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'cbind', .inorder = F ) %dopar% { my.fun(el) }
, times = ntimes)
```

### Benchmark combinations of BiocParallel

```{r bioc_parallel}
library(BiocParallel)
#
microbenchmark(
bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(16))
, bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(15))
, bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(14))
, times = ntimes)
```