Ecosyste.ms: Awesome

An open API service indexing awesome lists of open source software.

Awesome Lists | Featured Topics | Projects

https://github.com/adwaith-rajesh/ziframe

A minimal DataFrame library in zig
https://github.com/adwaith-rajesh/ziframe

dataframe zig ziglang

Last synced: about 1 month ago
JSON representation

A minimal DataFrame library in zig

Host: GitHub
URL: https://github.com/adwaith-rajesh/ziframe
Owner: Adwaith-Rajesh
License: mit
Created: 2024-06-15T11:18:27.000Z (5 months ago)
Default Branch: master
Last Pushed: 2024-06-17T17:10:55.000Z (5 months ago)
Last Synced: 2024-10-02T07:04:43.203Z (about 1 month ago)
Topics: dataframe, zig, ziglang
Language: Zig
Homepage:
Size: 15.6 KB
Stars: 1
Watchers: 1
Forks: 0
Open Issues: 0
Metadata Files:
- Readme: README.md
- License: LICENSE

Awesome Lists containing this project

README

        # Ziframe

A minimal 'DataFrame' library in zig.

In it's current form it can only perform basic operations such as

- add rows

- add columns

- read from CSV file

- removing rows/cols (using fromDF())

- apply a function over the DataFrame

- get shape

---

### Why

- It's part of something big.

- I wanted a way to read CSV files in zig in a proper way

---

### Usage

#### Building

- `build.zig.zon`

```zig

.{

    ...

    .dependencies = .{

        .ziframe = .{

            .url = "https://github.com/Adwaith-Rajesh/ziframe/archive/refs/tags/v0.1.0.tar.gz",

            .hash = "you know how to get this :)",

        }

    }

    ...

}

```

- `build.zig`

```zig

pub fn build(b: *std.Build) void {

    ...

    const ziframe = b.dependency("ziframe", .{

        .optimize = optimize,

        .target = target,

    });

    const your_exe = b.addExecutable(.{

        .name = "you_exe",

        .root_source_file = b.path("path/to/source_file.zig"),

        .target = target,

        .optimize = optimize,

    });

    // add ziframe import

    your_exe.root_module.addImport("ziframe", ziframe.module("ziframe"));

    ...

}

```

### Using Ziframe

- `test.csv`

```csv

id,marks1,marks2

1,10.23,20.45

2,12.90,33.45

3,46,50

```

- `main.zig`

```zig

const std = @import("std");

const debug = std.debug;

const zf = @import("ziframe");

pub fn main() !void {

    var gpa = std.heap.GeneralPurposeAllocator(.{}){};

    defer gpa.deinit()

    const alloc = gpa.allocator();

    // The columns of the DataFrame

    const DFColumns = struct {

        id: u32,

        marks1: f64,

        marks2: f64,

    };

    // Create an empty DataFrame

    // var df = zf.DataFrame(Columns).init(alloc);

    // defer df.deinit();

    // read test.csv a create a new DataFrame

    var df = try zf.DataFrame(DFColumns).fromCSV(alloc, "./test.csv", .{});

    defer df.deinit();

    debug.print("CSV file contents\n", .{});

    debug.print("{}\n", .{df});

    // adding a new row

    try df.append(.{ .id = 4, .marks1 = 10, .marks2 = 20 });

    debug.print("Add new row\n", .{});

    debug.print("{}\n", .{df});

    // Create a new DF with id and total marks from 'df'

    const TotalDFCols = struct {

        id: u32,

        total: f64,

    };

    // function on how to create the new df

    const total = struct {

        fn in(row: DFColumns) ?TotalDFCols {

            return .{

                .id = row.id,

                .total = row.marks1 + row.marks2,

            };

        }

    }.in;

    var total_df = try zf.DataFrame(TotalDFCols).fromDF(alloc, DFColumns, df, total);

    defer total_df.deinit();

    debug.print("New DataFrame with the total columns\n", .{});

    debug.print("{}\n", .{total_df});

    // filtering

    // filter DataFrame, display only even ids

    const filterEven = struct {

        fn in(row: TotalDFCols) ?TotalDFCols {

            if (row.id % 2 != 0) return null;

            return row;

        }

    }.in;

    var even_df = try zf.DataFrame(TotalDFCols).fromDF(alloc, TotalDFCols, total_df, filterEven);

    defer even_df.deinit();

    debug.print("DataFrame with only even ids\n", .{});

    debug.print("{}\n", .{even_df});

    // printing the shape and size

    debug.print("shape of df: {} size of df: {}\n", .{ df.shape(), df.shape().size() });

    debug.print("shape of total_df: {} size of df: {}\n", .{ total_df.shape(), total_df.shape().size() });

    debug.print("shape of even_df: {} size of df: {}\n", .{ even_df.shape(), even_df.shape().size() });

    // Map Function

    // set total = 50 where id = 2

    const update = struct {

        //             pointer to the row

        pub fn in(row: *TotalDFCols) !void {

            if (row.*.id == 2) {

                row.*.total = 50;

            }

        }

    }.in;

    try even_df.map(update);

    debug.print("\nset total = 50 where id = 2\n", .{});

    debug.print("{}\n", .{even_df});

}

```

- output

Output

```commandline

CSV file contents

index id marks1 marks2

0  1 10.2300000  20.4500000

1  2 12.9000000  33.4500000

2  3 46.0000000  50.0000000

Add new row

index id marks1 marks2

0  1 10.2300000  20.4500000

1  2 12.9000000  33.4500000

2  3 46.0000000  50.0000000

3  4 10.0000000  20.0000000

New DataFrame with the total columns

index id total

0  1 30.6800000

1  2 46.3500000

2  3 96.0000000

3  4 30.0000000

DataFrame with only even ids

index id total

0  2 46.3500000

1  4 30.0000000

shape of df: 4x3 size of df: 12

shape of total_df: 4x2 size of df: 8

shape of even_df: 2x2 size of df: 4

set total = 50 where id = 2

index id total

0  2 50.0000000

1  4 30.0000000

```

---

### More docs in the future - maybe :)

### Bye..