Ecosyste.ms: Awesome

An open API service indexing awesome lists of open source software.

Awesome Lists | Featured Topics | Projects

https://github.com/thiyangt/explainer

Tools to interpret random forest models :evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree:
https://github.com/thiyangt/explainer

Last synced: 20 days ago
JSON representation

Tools to interpret random forest models :evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree::evergreen_tree::deciduous_tree:

Awesome Lists containing this project

README

        

---
output: github_document
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
)
```
# explainer

[![Build Status](https://travis-ci.org/thiyangt/explainer.svg?branch=master)](https://travis-ci.org/thiyangt/explainer)

## Installation

```{r, message=FALSE, eval=FALSE}
# install.packages("devtools")
devtools::install_github("thiyangt/explainer")
library(explainer)
```
## Usage

### Number of times each variable appear in each tree in the forest (`forest_info`).

```{r, message=FALSE}
library(explainer)
library(randomForest)
library(ggplot2)
set.seed(2018)
forest <- randomForest(Species ~ ., data = iris, ntree=50)
forest_details <- forest_info(forest, 50)
table_count_treeLevel <- lapply(forest_details,function(temp){as.data.frame(table(temp))})
table_count_treeLevel[1:2] ## show results corresponds to first two trees

```

### Total number of times each variable appear in the forest

```{r}
library(data.table)
full_forestLevel <- rbindlist(table_count_treeLevel, use.names=TRUE)
fullforest <- split(full_forestLevel, full_forestLevel$temp)
variable_freq <- sapply(fullforest , function(temp){sum(temp$Freq)})
variable_freq
variable_percent <- variable_freq/sum(variable_freq)*100
variable_percent
```

## ICE curves using a subset of data `ice`

```{r, message=FALSE}
subset_iris <- iris[10:15, -5]
ice_SW <- ice_cal(forest, Sepal.Width, iris, subset_iris, grid.resolution=10, trim.outliers=FALSE)

ice_SW$variable <- rep(1:6,10)
f1 <- ggplot(data = ice_SW, aes_string(x = ice_SW$Sepal.Width, y = ice_SW$setosa)) +
stat_summary(fun.y = mean, geom = "line", col = "red", size = 1) + xlab("Sepal.Width") +
stat_summary(fun.data = mean_cl_normal, geom = "ribbon", fun.args = list(mult = 1), alpha = 0.3) +
theme(legend.position = "none") + ylab("setosa")

f2 <- ggplot(data = ice_SW, aes_string(x = ice_SW$Sepal.Width, y = ice_SW$versicolor)) +
stat_summary(fun.y = mean, geom = "line", col = "red", size = 1) + xlab("Sepal.Width") +
stat_summary(fun.data = mean_cl_normal, geom = "ribbon", fun.args = list(mult = 1), alpha = 0.3) +
theme(legend.position = "none") + ylab("versicolor")

f3 <- ggplot(data = ice_SW, aes_string(x = ice_SW$Sepal.Width, y = ice_SW$virginica)) +
stat_summary(fun.y = mean, geom = "line", col = "red", size = 1) + xlab("Sepal.Width") +
stat_summary(fun.data = mean_cl_normal, geom = "ribbon", fun.args = list(mult = 1), alpha = 0.3) +
theme(legend.position = "none") + ylab("virginica")

gridExtra::grid.arrange(f1, f2, f3, ncol=3)
```

## Two-way partial dependency plots

Create two-way partial dependency plots based on **subset of training data**
used to train the random forest: `twowayinteraction`.

```{r, message=FALSE}

SL_PW_interaction <- twowayinteraction(forest, Sepal.Length, Petal.Width, iris, iris, grid.resolution=10)
head(SL_PW_interaction)
```

Graphical representation of **SL_PW_interaction**

```{r}

p1 <- ggplot(
data = SL_PW_interaction, aes_string(x = SL_PW_interaction$Sepal.Length,y = SL_PW_interaction$Petal.Width, z = SL_PW_interaction$setosa, fill = SL_PW_interaction$setosa
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Petal Width") + theme(legend.position="none", aspect.ratio=1)+ggtitle("setosa")

p2 <- ggplot(
data = SL_PW_interaction, aes_string(x = SL_PW_interaction$Sepal.Length,y = SL_PW_interaction$Petal.Width, z = SL_PW_interaction$versicolor, fill = SL_PW_interaction$versicolor
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Petal Width") + theme(legend.position="none", aspect.ratio=1)+ggtitle("versicolor")

p3 <- ggplot(
data = SL_PW_interaction, aes_string(x = SL_PW_interaction$Sepal.Length,y = SL_PW_interaction$Petal.Width, z = SL_PW_interaction$virginica, fill = SL_PW_interaction$virginica
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Petal Width") + theme(legend.position="none", aspect.ratio=1)+ggtitle("virginica")

gridExtra::grid.arrange(p1, p2, p3, ncol=3)
```

```{r}
SL_SW_interaction <- twowayinteraction(forest, Sepal.Length, Sepal.Width, iris, iris, grid.resolution=10)
head(SL_SW_interaction)

f1 <- ggplot(
data = SL_SW_interaction, aes_string(x = SL_SW_interaction$Sepal.Length,y = SL_SW_interaction$Sepal.Width, z = SL_SW_interaction$setosa, fill = SL_SW_interaction$setosa
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Sepal Width") + theme( aspect.ratio=1)+ggtitle("setosa")

f2 <- ggplot(
data = SL_SW_interaction, aes_string(x = SL_SW_interaction$Sepal.Length,y = SL_SW_interaction$Sepal.Width, z = SL_SW_interaction$versicolor, fill = SL_SW_interaction$versicolor
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Sepal Width") + theme( aspect.ratio=1)+ggtitle("versicolor")

f3 <- ggplot(
data = SL_SW_interaction, aes_string(x = SL_SW_interaction$Sepal.Length,y = SL_SW_interaction$Sepal.Width, z = SL_SW_interaction$virginica, fill = SL_SW_interaction$virginica
))+geom_tile() +
scale_fill_viridis_c(limits = c(0, 1), breaks = seq(0, 1, 100),option = "A", direction = -1)+
xlab("Sepal Length") + ylab("Sepal Width") + theme( aspect.ratio=1)+ggtitle("virginica")

gridExtra::grid.arrange(f1, f2, f3, ncol=3)

```