Ecosyste.ms: Awesome
An open API service indexing awesome lists of open source software.
https://github.com/ajtulloch/dnngraph
A DSL for deep neural networks, supporting Caffe and Torch
https://github.com/ajtulloch/dnngraph
Last synced: about 1 month ago
JSON representation
A DSL for deep neural networks, supporting Caffe and Torch
- Host: GitHub
- URL: https://github.com/ajtulloch/dnngraph
- Owner: ajtulloch
- License: other
- Created: 2015-02-15T03:11:12.000Z (almost 10 years ago)
- Default Branch: master
- Last Pushed: 2015-12-07T04:39:34.000Z (about 9 years ago)
- Last Synced: 2024-10-28T14:05:24.946Z (about 2 months ago)
- Language: Haskell
- Homepage: http://ajtulloch.github.io/dnngraph
- Size: 500 KB
- Stars: 699
- Watchers: 40
- Forks: 59
- Open Issues: 9
-
Metadata Files:
- Readme: README.org
- License: LICENSE
Awesome Lists containing this project
- my-awesome - ajtulloch/dnngraph - 12 star:0.7k fork:0.1k A DSL for deep neural networks, supporting Caffe and Torch (Haskell)
- awesome-haskell-deep-learning - dnngraph
README
* DNNGraph - A deep neural network model generation DSL in Haskell
It consists of several parts:- A DSL for specifying the model. This uses the [[http://lens.github.io/][lens]] library for
elegant, composable constructions, and the [[http://hackage.haskell.org/package/fgl-5.5.0.1][fgl]] graph library for
specifying the network layout.
- A set of optimization passes that run over the graph representation
to improve the performance of the model. For example, we can take
advantage of the fact that several layers types (=ReLU=, =Dropout=)
can operate in-place.
- A set of backends to generate code for the platform. Currently, we
generate
- Caffe (by generating model =prototxt= files)
- Torch (by generating Lua scripts)
- A set of useful CLI tools for exporting, visualizing and
understanding a model (visualization of network structure, parameter
density)For a guided example, see a [[http://bit.ly/17kDYze][demonstration IHaskell Notebook]].
** Building
Make sure that you have Python 2 and =protoc= from [[https://developers.google.com/protocol-buffers/][Protocol Buffers]] installed. Then run
#+BEGIN_SRC
$ cabal install hprotoc
$ ./lens_proto.sh # generate code from protocol buffers
$ cabal install
#+END_SRC** DSL Examples
The following script generates a replica of
https://github.com/BVLC/caffe/blob/master/models/bvlc_alexnet/train_val.prototxt.*** AlexNet
#+begin_src haskell
import Control.Lens
import Control.Monadimport NN.DSL
import NN.Examples.ImageNet
import NN.GraphalexTrain = train & cropSize' 227 & batchSize' 256 & mirror' True
alexTest = test & cropSize' 227 & batchSize' 50 & mirror' FalsealexLrn = lrn & localSize' 5 & alphaLRN' 0.0001 & betaLRN' 0.75
alexConv = conv & param' alexMult & weightFillerC' (gaussian 0.01) & biasFillerC' zero
alexIP n = ip n & param' alexMult & weightFillerIP' (gaussian 0.005) & biasFillerIP' (constant 0.1)
alexPool = maxPool & sizeP' 3alexMult = [def & lrMult' 1 & decayMult' 1, -- weights
def & lrMult' 2 & decayMult' 0] -- biases-- |Model
conv1 = alexConv & numOutputC' 96 & kernelSizeC' 11 & strideC' 4
conv2 = alexConv & numOutputC' 256 & padC' 2 & kernelSizeC' 5 & groupC' 2
conv3 = alexConv & numOutputC' 384 & padC' 1 & kernelSizeC' 3
conv4 = alexConv & numOutputC' 384 & padC' 1 & kernelSizeC' 3 & groupC' 2 & biasFillerC' (constant 0.1)
conv5 = alexConv & numOutputC' 256 & padC' 1 & kernelSizeC' 3 & groupC' 2 & biasFillerC' (constant 0.1)alexNet = do
-- Set up the model
(input', representation) <-
sequential [
-- Convolutional Layers
conv1, relu, alexLrn, alexPool & strideP' 3,
conv2, relu, alexLrn, alexPool & strideP' 2,
conv3, relu,
conv4, relu,
conv5, relu, alexPool & strideP' 2,
-- FC Layers
alexIP 4096, relu, dropout 0.5,
alexIP 4096, relu, dropout 0.5,
alexIP 1000 & weightFillerIP' (gaussian 0.01) & biasFillerIP' zero]forM_ [alexTrain, alexTest] $ attach (To input')
forM_ [accuracy 1, accuracy 5, softmax] $ attach (From representation)
#+end_srcor visually, using =NN.Visualize=,
#+ATTR_HTML: :height 600px
[[http://i.imgur.com/1hKlPdA.png]]*** GoogLeNet
The following script generates a replica of
https://github.com/BVLC/caffe/blob/master/models/bvlc_googlenet/train_val.prototxt#+begin_src haskell
module NN.Examples.GoogLeNet whereimport Gen.Caffe.FillerParameter as FP
import Gen.Caffe.InnerProductParameter as IP
import Gen.Caffe.LayerParameter as LPimport Control.Lens
import Control.Monad
import Data.Sequence (singleton)
import Data.Wordimport NN
import NN.Examples.ImageNetgoogleTrain = train & mirror' True & batchSize' 32 & cropSize' 224
googleTest = test & mirror' False & batchSize' 50 & cropSize' 224googleMult = [def & lrMult' 1 & decayMult' 1, -- weights
def & lrMult' 2 & decayMult' 0] -- biases
googleConv = conv & param' googleMult & biasFillerC' (constant 0.2)
googleLRN = lrn & localSize' 5 & alphaLRN' 0.0001 & betaLRN' 0.75
googlePool = maxPool & sizeP' 3 & strideP' 2
googleIP n = ip n & param' googleMultconv1 = googleConv & numOutputC' 64 & padC' 3 & kernelSizeC' 7 & strideC' 2 & weightFillerC' (xavier 0.1)
conv2 = googleConv & numOutputC' 192 & padC' 1 & kernelSizeC' 3 & weightFillerC' (xavier 0.03)topPool = avgPool & sizeP' 7 & strideP' 1
topFc = googleIP 1000 & biasFillerIP' (constant 0) & weightFillerIP' (xavier 0.0)
-- Weird, but in Caffe replication
& _inner_product_param._Just.IP._weight_filler._Just._std .~ Nothingdata Inception = Inception {_1x1, _3x3reduce, _3x3, _5x5reduce, _5x5, _poolProj :: Word32}
inception :: Node -> Inception -> NetBuilder Node
inception input Inception{..} = do
columns' <- mapM sequential columns
concat'' <- layer' concat'
forM_ columns' $ \(bottom, top) -> do
input >-> bottom
top >-> concat''
return concat''
where
columns = [
[googleConv & numOutputC' _1x1 & kernelSizeC' 1 & weightFillerC' (xavier 0.03), relu],
[googleConv & numOutputC' _3x3reduce & kernelSizeC' 1 & weightFillerC' (xavier 0.09), relu, googleConv & numOutputC' _3x3 & kernelSizeC' 3 & weightFillerC' (xavier 0.03) & padC' 1, relu],
[googleConv & numOutputC' _5x5reduce & kernelSizeC' 1 & weightFillerC' (xavier 0.2), relu, googleConv & numOutputC' _5x5 & kernelSizeC' 5 & weightFillerC' (xavier 0.03) & padC' 2, relu],
[maxPool& sizeP' 3 & strideP' 3 & padP' 1, googleConv & numOutputC' _poolProj & kernelSizeC' 1 & weightFillerC' (xavier 0.1), relu]]intermediateClassifier :: Node -> NetBuilder ()
intermediateClassifier source = do
(input, representation) <- sequential [pool1, conv1', relu, fc1, relu, dropout 0.7, fc2]
source >-> inputforM_ [accuracy 1, accuracy 5, softmax & _loss_weight <>~ singleton 0.3] $ attach (From representation)
where
pool1 = avgPool & sizeP' 5 & strideP' 3
conv1' = googleConv & numOutputC' 128 & kernelSizeC' 1 & weightFillerC' (xavier 0.08)
fc1 = googleIP 1024 & weightFillerIP' (xavier 0.02) & biasFillerIP' (constant 0.2)
fc2 = googleIP 1000 & weightFillerIP' (xavier 0.0009765625) & biasFillerIP' (constant 0)-- What to do at each row in the inner column?
data Row = I Inception | Classifier | MaxPoolinsertRow :: Node -> Row -> NetBuilder Node
insertRow input (I inceptor) = inception input inceptor
insertRow input Classifier = do
intermediateClassifier input
return input
insertRow input MaxPool = do
node <- layer' googlePool
input >-> node
return nodegoogLeNet :: NetBuilder ()
googLeNet = do
(input, initial) <- sequential [conv1, relu, googlePool, googleLRN, conv2, relu, googleLRN, googlePool]top <- foldM insertRow initial [
I $ Inception 64 96 128 16 32 32,
I $ Inception 128 128 192 32 96 64,
MaxPool,
I $ Inception 192 96 208 16 48 64,
Classifier,
I $ Inception 150 112 224 24 64 64,
I $ Inception 128 128 256 24 64 64,
I $ Inception 112 144 288 32 64 64,
Classifier,
I $ Inception 256 160 320 32 128 128,
MaxPool,
I $ Inception 256 160 320 32 128 128,
I $ Inception 384 192 384 48 128 128](_, representation) <- with top >- sequential [topPool, dropout 0.4, topFc]
forM_ [accuracy 1, accuracy 5, softmax] $ attach (From representation)
forM_ [googleTrain, googleTest] $ attach (To input)main :: IO ()
main = cli googLeNet
#+end_src** CLI Usage
In the GoogLeNet example, above, we included the line =main = cli
googLeNet=. This generates a CLI for our model that can be accessed
with =runhaskell /path/to/our/model.hs=. Currently, we can- export to Caffe
- export to Torch
- visualize the network structure.For example:
#+BEGIN_SRC
$ runhaskell NN/Examples/GoogLeNet.hs --help
Usage: GoogLeNet.hs COMMANDAvailable options:
-h,--help Show this help textAvailable commands:
caffe Generate a Caffe .prototxt to run with `caffe train
--model=<>
torch Generate Lua code to be `require`'d into an existing
Torch script
visualize Generate an image visualizing the model's connectivity$ runhaskell NN/Examples/GoogLeNet.hs caffe --output /tmp/x.prototxt
$ runhaskell NN/Examples/GoogLeNet.hs visualize --format pdf --output /tmp/x.pdf
#+END_SRC** Caffe Backend
The Caffe backend generates a Caffe =.prototxt= that can be run with
=caffe train --model=<>=, without any modification necessary.** Torch Backend
The Torch backend generates Lua code that can be imported directly
into an existing Torch script.Anything network that can be expressed as a nested combination of
computational layers, combined with =nn.Sequential=, =nn.Concat=,
=nn.ModelParallel=, =nn.DataParallel= etc can be generated under this framework.For an example output, the model specified as
#+begin_src haskell
alexTrain = train & cropSize' 227 & batchSize' 256 & mirror' True
alexTest = test & cropSize' 227 & batchSize' 50 & mirror' FalsealexConv = conv & param' alexMult & weightFillerC' (gaussian 0.01) & biasFillerC' zero
alexPool = maxPool & sizeP' 3conv1 = alexConv & numOutputC' 96 & kernelSizeC' 11 & strideC' 4
pool1 = alexPool & strideP' 3model = do
(input', representation) <- sequential [conv1, relu, pool1]
forM_ [alexTrain, alexTest] $ attach (To input')
forM_ [accuracy 1, accuracy 5, softmax] $ attach (From representation)
#+end_srcgenerates the following code:
#+begin_src lua
require("nn")
require("cunn")
local seq0 = nn.Sequential()
seq0:add(nn.SpatialConvolutionMM(nil, 96, 11, 11, 4, 4, 0))
seq0:add(nn.Threshold())
seq0:add(nn.SpatialMaxPooling(3, 3, 3, 3))
seq0:add(nn.LogSoftMax())
local criterion1 = nn.ClassNLLCriterion()
return seq0, criterion1
#+end_srcFor a more complicated example, the network specified as
#+begin_src haskell
do
x <- layer' relu
(_, y) <- with x >- sequential [conv, relu, maxPool, conv, relu]
(_, z) <- with x >- sequential [conv, relu, maxPool, conv, relu]
concat'' <- layer' concat'y >-> concat''
z >-> concat''
_ <- with concat'' >- sequential [ip 4096, relu, dropout 0.5, ip 1000, softmax]
return ()
#+end_srcthat looks like
#+ATTR_HTML: :height 600px
[[http://i.imgur.com/dsqgYna.png][http://i.imgur.com/dsqgYna.png]]will generate
#+begin_src lua
require("nn")
local seq0 = nn.Sequential()
local mod1 = nn.Threshold()
seq0:add(mod1)
local concat2 = nn.DepthConcat()
local seq3 = nn.Sequential()
local mod4 = nn.SpatialConvolutionMM(nil, nil, nil, nil, 1, 1, 0)
seq3:add(mod4)
local mod5 = nn.Threshold()
seq3:add(mod5)
local mod6 = nn.SpatialMaxPooling(nil, nil, 1, 1)
seq3:add(mod6)
local mod7 = nn.SpatialConvolutionMM(nil, nil, nil, nil, 1, 1, 0)
seq3:add(mod7)
local mod8 = nn.Threshold()
seq3:add(mod8)
concat2:add(seq3)
local seq9 = nn.Sequential()
local mod10 = nn.SpatialConvolutionMM(nil, nil, nil, nil, 1, 1, 0)
seq9:add(mod10)
local mod11 = nn.Threshold()
seq9:add(mod11)
local mod12 = nn.SpatialMaxPooling(nil, nil, 1, 1)
seq9:add(mod12)
local mod13 = nn.SpatialConvolutionMM(nil, nil, nil, nil, 1, 1, 0)
seq9:add(mod13)
local mod14 = nn.Threshold()
seq9:add(mod14)
concat2:add(seq9)
seq0:add(concat2)
local mod15 = nn.Linear(nil, 4096)
seq0:add(mod15)
local mod16 = nn.Threshold()
seq0:add(mod16)
local mod17 = nn.Dropout(0.5)
seq0:add(mod17)
local mod18 = nn.Linear(nil, 1000)
seq0:add(mod18)
local mod19 = nn.LogSoftMax()
seq0:add(mod19)
local criteria20 = nn.ClassNLLCriterion()
return seq0, criteria20
#+end_src** Visualization Examples
The =NN.Visualize= module provides some plotting tools. To use these,#+begin_src haskell
import NN.Visualizevisualize :: Net -> DotGraph Node
png :: FilePath -> DotGraph Node -> IO FilePath-- For example, to visualize GoogLeNet to a file
file :: FilePath
(frontend googLeNet & visualize & png file) :: IO FilePath
#+end_srcAn example output is (click for higher resolution):
#+ATTR_HTML: :height 600px
[[http://i.imgur.com/ScvjNmT.jpg]]
** Parameter Sweeps
To use this, write your model generation script as a Haskell file, and
then (for example)
#+begin_src sh
caffe train --model <(runhaskell Model.hs) --solver=solver.prototxt
#+end_srcTo perform a parameter sweep, use the parameterizing
#+begin_src sh
for model in $(runhaskell Model.hs); do
caffe train --model=$model --solver=solver.prototxt
done
#+end_src