https://github.com/dhruvsrikanth/cudann

A distributed implementation of a deep learning framework in CUDA.
https://github.com/dhruvsrikanth/cudann

cpp cuda deep-learning deep-learning-framework gpu-programming high-performance-computing hpc parallel-programming

Last synced: 2 months ago
JSON representation

A distributed implementation of a deep learning framework in CUDA.

Host: GitHub
URL: https://github.com/dhruvsrikanth/cudann
Owner: DhruvSrikanth
License: mit
Created: 2022-12-12T17:52:58.000Z (over 3 years ago)
Default Branch: master
Last Pushed: 2023-01-06T03:46:43.000Z (over 3 years ago)
Last Synced: 2025-05-29T20:12:45.479Z (about 1 year ago)
Topics: cpp, cuda, deep-learning, deep-learning-framework, gpu-programming, high-performance-computing, hpc, parallel-programming
Language: C++
Homepage:
Size: 186 KB
Stars: 2
Watchers: 1
Forks: 0
Open Issues: 0
Metadata Files:
- Readme: README.md
- License: LICENSE

Awesome Lists containing this project

README

          # CUDANN

A distributed implementation of a deep learning framework in CUDA.

# Serial Implementation

This can be tested using the following command - 

```shell

make test_random

```

The above command runs the following example of using the framework - 

```c++

#include 

#include 

#include 

#include 

#include 

#include  

#include 

#include "../serial/cudann.h"

struct MiniBatch {

    Tensor *input;

    Tensor *target;

};

struct Dataloader {

    MiniBatch *minibatches;

    int n_batches;

};

void train(const int n_classes, const int n_features, const double learning_rate, const int epochs, Dataloader *dataloader, NN *model, CrossEntropy *criterion) {

    // Print model summary

    model->summary();

    printf("Loss function: ");

    criterion->show();

    printf("Training model for %d epochs with learning rate %f.\n", epochs, learning_rate);

    // Train the model

    for (int epoch = 0; epoch < epochs; epoch++) {

        // Compute average loss over a batch

        double avg_loss = 0.0;

        for (int mb = 0; mb < dataloader->n_batches; mb++) {

            // Get minibatch and copy it to the appropriate device

            Tensor *input = (Tensor*) malloc(sizeof(Tensor));

            Tensor *target = (Tensor*) malloc(sizeof(Tensor));

            copy_tensor(input, dataloader->minibatches[mb].input);

            copy_tensor(target, dataloader->minibatches[mb].target);

            // Forward pass

            Tensor *output = model->forward(input);

            // Compute loss

            Tensor *loss = criterion->forward(output, target);

            

            // Compute the loss gradient

            Tensor *downstream_grad = criterion->backward();

            // Backward pass

            model->backward(downstream_grad);

            // Update weights

            model->update_weights(learning_rate);

            // Compute average loss

            avg_loss += loss->sum() / loss->batch_size;

        }

        avg_loss /= dataloader->n_batches;

        

        printf("Epoch %d: Average loss: %f\n", epoch + 1, avg_loss);

    }

}

int main(int argc, char *argv[]) {

    const int n_classes = 10;

    const int n_features = 28*28;

    const int batch_size = 64;

    const double learning_rate = 0.01;

    const int n_batches = 1000;

    const int epochs = 10;

    // Add layers

    Linear linear1(n_features, 128, true, "random", "linear1");

    ReLU relu1(128, "relu1");

    Linear linear2(128, n_classes, true, "random", "linear2");

    Softmax softmax(n_classes, "softmax");

    // Create model and add layers

    NN model;

    model.add_layer(&linear1);

    model.add_layer(&relu1);

    model.add_layer(&linear2);

    model.add_layer(&softmax);

    // Get loss function

    CrossEntropy criterion("cross_entropy");

    // Create dataloader

    Dataloader dataloader;

    dataloader.n_batches = n_batches;

    dataloader.minibatches = (MiniBatch*) malloc(n_batches*sizeof(MiniBatch));

    for (int i = 0; i < n_batches; i++) {

        // Create random input tensor

        double *data = (double*) malloc(batch_size*n_features*sizeof(double));

        initialize_random(data, batch_size*n_features);

        Tensor input(batch_size, n_features, data);

        // Create the random target tensor

        double *target_data = (double*) malloc(batch_size*n_classes*sizeof(double));

        initialize_salt_and_pepper(target_data, batch_size*n_classes);

        Tensor target(batch_size, n_classes, target_data);

        // Create minibatch

        MiniBatch minibatch;

        minibatch.input = (Tensor*) malloc(sizeof(Tensor));

        minibatch.target = (Tensor*) malloc(sizeof(Tensor));

        copy_tensor(minibatch.input, &input);

        copy_tensor(minibatch.target, &target);

        // Add minibatch to dataloader

        dataloader.minibatches[i] = minibatch;

    }

    // Train the model

    train(n_classes, n_features, learning_rate, epochs, &dataloader, &model, &criterion);

    

    return 0;

}

```

ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Awesome

https://github.com/dhruvsrikanth/cudann

Awesome Lists containing this project

README