https://github.com/averissimo/notes-on-r
General notes and tips on R
https://github.com/averissimo/notes-on-r
Last synced: 4 months ago
JSON representation
General notes and tips on R
- Host: GitHub
- URL: https://github.com/averissimo/notes-on-r
- Owner: averissimo
- License: gpl-3.0
- Created: 2016-12-13T16:59:39.000Z (over 9 years ago)
- Default Branch: master
- Last Pushed: 2016-12-13T21:18:11.000Z (over 9 years ago)
- Last Synced: 2025-06-08T11:48:25.662Z (12 months ago)
- Size: 19.5 KB
- Stars: 0
- Watchers: 2
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: README.Rmd
- License: LICENSE
Awesome Lists containing this project
README
---
title: "Notes on R"
author: "André Veríssimo"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Benchmarking parallel methods with `cor`
### Functions to build dataset
```{r fun_aux}
library(futile.logger)
genData <- function(reps = 100, sample.size = 1000) {
genDataAux <- function(template = runif(1000), correlation = 1) {
xdata <- array(0, length(template))
correlated.ixs <- template < correlation
xdata[correlated.ixs] <- template[correlated.ixs] + .0005 * runif(length(template[correlated.ixs]))
xdata[!correlated.ixs] <- template[!correlated.ixs] + .05 * rnorm(length(template[!correlated.ixs]))
return(xdata)
}
template <- runif(sample.size)
dat <- sapply(seq(reps), function(e) {genDataAux(template , correlation = abs(.3 + runif(1))) })
#
flog.info('Size of generated data:', dim(dat), capture = T)
return(list(dat = dat, template = template))
}
my.fun <- function(dat.ix) {
cor(dat$template, dat$dat[,1], method = 'pearson')
}
```
### Setting up common benchmark parameters
```{r setup_params}
library(microbenchmark)
ntimes <- 10
#
dat <- genData(reps = 1, sample.size = 1000)
#
reps <- 100000;
flog.info('')
flog.info('')
flog.info('Repeating benchmarks %d times to find statistics', ntimes)
flog.info(' -> %d samples per variables', length(dat$template))
flog.info(' -> %d correlations being calculated', reps)
flog.info('')
```
### Benchmark Xapply functions
```{r xapply}
microbenchmark(
lapply(1:reps, my.fun)
, sapply(1:reps, my.fun)
, vapply(1:reps, my.fun, array(double(0), 1))
, times = ntimes)
```
### Benchmark different combinations of mcapply
```{r mclapply}
library(parallel)
microbenchmark(
mclapply(1:reps, my.fun, mc.cores = 16)
, mclapply(1:reps, my.fun, mc.cores = 15)
, mclapply(1:reps, my.fun, mc.cores = 14)
, times = ntimes)
```
### Benchamark different combinations of foreach
```{r foreach}
library(parallel)
library(foreach)
library(doMC) # parallel computing
registerDoMC(cores = 16)
flog.info('16 cores')
microbenchmark(
foreach(el = 1:reps) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .inorder = F ) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'c') %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'c', .inorder = F ) %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'cbind') %dopar% { my.fun(el) }
, foreach(el = 1:reps, .combine = 'cbind', .inorder = F ) %dopar% { my.fun(el) }
, times = ntimes)
```
### Benchmark combinations of BiocParallel
```{r bioc_parallel}
library(BiocParallel)
#
microbenchmark(
bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(16))
, bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(15))
, bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(14))
, times = ntimes)
```