{"id":15048009,"url":"https://github.com/wst24365888/libstreamvbyte","last_synced_at":"2025-04-10T01:08:30.922Z","repository":{"id":63468353,"uuid":"560689176","full_name":"wst24365888/libstreamvbyte","owner":"wst24365888","description":"A C++ implementation of StreamVByte, with Python bindings.","archived":false,"fork":false,"pushed_at":"2024-07-27T13:37:59.000Z","size":152,"stargazers_count":10,"open_issues_count":0,"forks_count":1,"subscribers_count":1,"default_branch":"main","last_synced_at":"2025-03-24T02:51:31.379Z","etag":null,"topics":["aarch64","amd64","apple-silicon","cibuildwheel","compression","cpp","cpp11","integer-compression","parallel","pybind11","pypi","python","python3","simd","ssse3","streamvbyte","x86-64"],"latest_commit_sha":null,"homepage":"https://pypi.org/project/libstreamvbyte/","language":"C++","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/wst24365888.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2022-11-02T03:21:00.000Z","updated_at":"2024-07-27T13:37:32.000Z","dependencies_parsed_at":"2025-02-16T06:32:51.327Z","dependency_job_id":"e522fd0b-a31f-4e81-849a-2bb03d246781","html_url":"https://github.com/wst24365888/libstreamvbyte","commit_stats":null,"previous_names":[],"tags_count":16,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/wst24365888%2Flibstreamvbyte","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/wst24365888%2Flibstreamvbyte/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/wst24365888%2Flibstreamvbyte/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/wst24365888%2Flibstreamvbyte/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/wst24365888","download_url":"https://codeload.github.com/wst24365888/libstreamvbyte/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":247829517,"owners_count":21002997,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["aarch64","amd64","apple-silicon","cibuildwheel","compression","cpp","cpp11","integer-compression","parallel","pybind11","pypi","python","python3","simd","ssse3","streamvbyte","x86-64"],"created_at":"2024-09-24T21:06:51.460Z","updated_at":"2025-04-10T01:08:30.902Z","avatar_url":"https://github.com/wst24365888.png","language":"C++","funding_links":[],"categories":[],"sub_categories":[],"readme":"\u003cdiv id=\"top\"\u003e\u003c/div\u003e\r\n\r\n\u003c!-- PROJECT SHIELDS --\u003e\r\n\r\n[\u003cdiv align=\"center\"\u003e ![Contributors][contributors-shield]][contributors-url]\r\n[![Forks][forks-shield]][forks-url]\r\n[![Stargazers][stars-shield]][stars-url]\r\n[![MIT License][license-shield]][license-url]\r\n[![Issues][issues-shield]][issues-url]\r\n[![Issues Closed][issues-closed-shield]][issues-closed-url]\r\n[![Python Version][python-version-shield]][python-version-url]\r\n[![Week Download][download-shield]\u003c/div\u003e][download-url]\r\n\r\n\u003cbr /\u003e\r\n\r\n\u003c!-- PROJECT BANNER --\u003e\r\n\r\n![libstreamvbyte](https://socialify.git.ci/wst24365888/libstreamvbyte/image?description=1\u0026font=KoHo\u0026name=1\u0026owner=1\u0026pattern=Circuit%20Board\u0026theme=Light)\r\n\r\n\u003cbr /\u003e\r\n\u003cdiv align=\"center\"\u003e\r\n\u003cp align=\"center\"\u003e\r\n    \u003ca href=\"https://github.com/wst24365888/libstreamvbyte#usage\"\u003e\u003cstrong\u003eExplore Usage »\u003c/strong\u003e\u003c/a\u003e\r\n    \u003cbr /\u003e\r\n    \u003cbr /\u003e\r\n    \u003ca href=\"https://github.com/wst24365888/libstreamvbyte/issues\"\u003eReport Bug\u003c/a\u003e\r\n    ·\r\n    \u003ca href=\"https://github.com/wst24365888/libstreamvbyte/issues\"\u003eRequest Feature\u003c/a\u003e\r\n  \u003c/p\u003e\r\n\u003c/div\u003e\r\n\r\n\u003c!-- TABLE OF CONTENTS --\u003e\r\n\r\n\u003cdetails\u003e\r\n  \u003csummary\u003eTable of Contents\u003c/summary\u003e\r\n  \u003col\u003e\r\n    \u003cli\u003e\r\n      \u003ca href=\"#about-the-project\"\u003eAbout The Project\u003c/a\u003e\r\n    \u003c/li\u003e\r\n    \u003cli\u003e\r\n      \u003ca href=\"#getting-started\"\u003eGetting Started\u003c/a\u003e\r\n      \u003cul\u003e\r\n        \u003cli\u003e\u003ca href=\"#installation\"\u003eInstallation\u003c/a\u003e\u003c/li\u003e\r\n        \u003cli\u003e\u003ca href=\"#usage\"\u003eUsage\u003c/a\u003e\u003c/li\u003e\r\n        \u003cli\u003e\u003ca href=\"#example\"\u003eExample\u003c/a\u003e\u003c/li\u003e\r\n      \u003c/ul\u003e\r\n    \u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#benchmark\"\u003eBenchmark\u003c/a\u003e\u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#roadmap\"\u003eRoadmap\u003c/a\u003e\u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#contributing\"\u003eContributing\u003c/a\u003e\u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#license\"\u003eLicense\u003c/a\u003e\u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#reference\"\u003eReference\u003c/a\u003e\u003c/li\u003e\r\n    \u003cli\u003e\u003ca href=\"#contact\"\u003eContact\u003c/a\u003e\u003c/li\u003e\r\n  \u003c/ol\u003e\r\n\u003c/details\u003e\r\n\r\n\u003c!-- ABOUT THE PROJECT --\u003e\r\n\r\n## About The Project\r\n\r\n`libstreamvbyte` is a `C++` implementation of [StreamVByte](#reference), with `Python` bindings using `pybind11`. \r\n\r\n[StreamVByte](#reference) is an integer compression technique that use SIMD instructions (vectorization) to improve performance. The library is optimized with `SSSE3` intrinsics, which are supported by most `x86_64` processors. It uses `sse2neon` to translate `SSSE3` intrinsics to `NEON` intrinsics for `ARM` processors. The library can also be used with other 32-bit architectures, although it will fall back to scalar implementations in those cases.\r\n\r\nWith `libstreamvbyte`, you can quickly and efficiently compress integer sequences, reducing the amount of storage space and network bandwidth required. The library is easy to use and integrates seamlessly with `Python` via `pybind11` bindings. Whether you're working with large datasets or building a distributed computing system, `libstreamvbyte` can help you improve performance and reduce the resources needed to handle your data.\r\n\r\nCurrently supports `Python 3.8+` on Windows, Linux (`manylinux_2_17`, `musllinux_1_1`) and macOS (`universal2`).\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- GETTING STARTED --\u003e\r\n\r\n## Getting Started\r\n\r\n### Installation\r\n\r\n#### For `Python`\r\n\r\nInstall from `PyPI` using `pip`.\r\n\r\n```bash\r\npip install libstreamvbyte\r\n```\r\n\r\nOr install from `.whl` file.\r\n\r\n```bash\r\npip install \"path/to/your/downloaded/whl\"\r\n```\r\n\r\nTo find appropriate `.whl` file, please visit [releases](https://github.com/wst24365888/libstreamvbyte/releases).\r\n\r\n#### For `C++`\r\n\r\nYou must have `CMake` installed on your system.\r\n\r\n```bash\r\n# clone the repo\r\ngit clone https://github.com/wst24365888/libstreamvbyte\r\ncd libstreamvbyte\r\n\r\n# build and install\r\ncmake .\r\nmake\r\nsudo make install\r\n```\r\n\r\n### Usage\r\n\r\n#### For `Python`\r\n\r\nImport `libstreamvbyte` first.\r\n\r\n```python\r\nimport libstreamvbyte as svb\r\n```\r\n\r\nAnd here are the APIs.\r\n\r\n```python\r\n# Encode an array of unsigned integers into a byte array.\r\nencode(in_uint32: numpy.ndarray[numpy.uint32]) -\u003e numpy.ndarray[numpy.uint8]\r\n\r\n# Decode a byte array into an array of unsigned integers.\r\ndecode(in_uint8: numpy.ndarray[numpy.uint8], size: int) -\u003e numpy.ndarray[numpy.uint32]\r\n\r\n# Encode an array of signed integers into an array of unsigned integers.\r\nzigzag_encode(in_int32: numpy.ndarray[numpy.int32]) -\u003e numpy.ndarray[numpy.uint32]\r\n\r\n# Decode an array of unsigned integers into an array of signed integers.\r\nzigzag_decode(in_uint32: numpy.ndarray[numpy.uint32]) -\u003e numpy.ndarray[numpy.int32]\r\n\r\n# Check if the current wheel is a vectorized version.\r\nis_vectorized_version() -\u003e bool\r\n```\r\n\r\n#### For `C++`\r\n\r\nInclude `streamvbyte.h` first.\r\n\r\n```cpp\r\n#include \"streamvbyte.h\"\r\n```\r\n\r\nFor the APIs, please refer to [include/streamvbyte.h](https://github.com/wst24365888/libstreamvbyte/blob/main/include/streamvbyte.h).\r\n\r\n### Example\r\n\r\n#### For `Python`\r\n\r\n```python\r\nimport libstreamvbyte as svb\r\n\r\nN = 2**20 + 2\r\n\r\n# type(original_data) == np.ndarray\r\n# original_data.dtype == np.int32\r\noriginal_data = np.random.randint(-2**31, 2**31, N, dtype=np.int32)\r\n\r\n# type(compressed_bytes) == np.ndarray\r\n# compressed_bytes.dtype == np.uint8\r\ncompressed_bytes = svb.encode(svb.zigzag_encode(original_data))\r\n\r\n# type(recovered_data) == np.ndarray\r\n# recovered_data.dtype == np.int32\r\nrecovered_data = svb.zigzag_decode(svb.decode(compressed_bytes, N))\r\n```\r\n\r\n#### For `C++`\r\n\r\n```cpp\r\n#include \"streamvbyte.h\"\r\n\r\nint main() {\r\n    std::size_t N = (1 \u003c\u003c 20) + 2;\r\n\r\n    std::vector\u003cint32_t\u003e original_data(N);\r\n    for (std::size_t i = 0; i \u003c N; ++i) {\r\n        original_data[i] = rand() - rand();\r\n    }\r\n\r\n    std::vector\u003cuint8_t\u003e compressed_bytes = streamvbyte::encode(streamvbyte::zigzag_encode(original_data));\r\n    std::vector\u003cint32_t\u003e recovered_data = streamvbyte::zigzag_decode(streamvbyte::decode(compressed_bytes, N));\r\n\r\n    return 0;\r\n}\r\n```\r\n\r\nCompile it with linking to `libstreamvbyte`.\r\n\r\n```bash\r\ng++ -o example example.cpp -lstreamvbyte\r\n```\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- BENCHMARK --\u003e\r\n\r\n## Benchmark\r\n\r\n```bash\r\nOS: Linux 5.15.79.1-microsoft-standard-WSL2 x86_64\r\nCPU: AMD Ryzen 5 3600 6-Core Processor (12) @ 3.600GHz\r\n\r\nRun on (12 X 3593.26 MHz CPU s)\r\nCPU Caches:\r\n  L1 Data 32 KiB (x6)\r\n  L1 Instruction 32 KiB (x6)\r\n  L2 Unified 512 KiB (x6)\r\n  L3 Unified 16384 KiB (x1)\r\nLoad Average: 0.81, 0.85, 0.69\r\n-----------------------------------------------------------------------------------\r\nBenchmark                              Time             CPU   Iterations Throughput\r\n-----------------------------------------------------------------------------------\r\nBM_memcpy/4096                       149 ns          149 ns      4688531 13.7122G/s\r\nBM_memcpy/8192                       548 ns          548 ns      1275803 7.46783G/s\r\nBM_memcpy/16384                     1139 ns         1138 ns       640835 7.19553G/s\r\nBM_memcpy/32768                     2185 ns         2185 ns       320840 7.49932G/s\r\nBM_memcpy/65536                     4921 ns         4921 ns       142703 6.65895G/s\r\nBM_memcpy/131072                   10968 ns        10968 ns        63502 5.97511G/s\r\nBM_memcpy/262144                   22465 ns        22465 ns        31134 5.83457G/s\r\nBM_memcpy/524288                   45101 ns        45100 ns        15541 5.81245G/s\r\nBM_memcpy/1048576                  91131 ns        91131 ns         7639 5.75314G/s\r\nBM_streamvbyte_encode/4096          1222 ns         1222 ns       580855 1.67556G/s\r\nBM_streamvbyte_encode/8192          2470 ns         2467 ns       282349 1.66064G/s\r\nBM_streamvbyte_encode/16384         4945 ns         4945 ns       139671 1.65662G/s\r\nBM_streamvbyte_encode/32768         9990 ns         9989 ns        70497 1.64017G/s\r\nBM_streamvbyte_encode/65536        19853 ns        19853 ns        30963 1.65051G/s\r\nBM_streamvbyte_encode/131072       39933 ns        39932 ns        17401 1.64118G/s\r\nBM_streamvbyte_encode/262144       80563 ns        80562 ns         8193 1.62697G/s\r\nBM_streamvbyte_encode/524288      160716 ns       160716 ns         4284  1.6311G/s\r\nBM_streamvbyte_encode/1048576     319253 ns       319253 ns         1942 1.64223G/s\r\nBM_streamvbyte_decode/4096           691 ns          691 ns      1040462 2.96191G/s\r\nBM_streamvbyte_decode/8192          1341 ns         1341 ns       516979 3.05539G/s\r\nBM_streamvbyte_decode/16384         2683 ns         2683 ns       261208 3.05359G/s\r\nBM_streamvbyte_decode/32768         5348 ns         5348 ns       130319 3.06353G/s\r\nBM_streamvbyte_decode/65536        10817 ns        10817 ns        64427 3.02936G/s\r\nBM_streamvbyte_decode/131072       23207 ns        23207 ns        31546   2.824G/s\r\nBM_streamvbyte_decode/262144       45746 ns        45746 ns        11291 2.86519G/s\r\nBM_streamvbyte_decode/524288       88660 ns        88660 ns         7947 2.95673G/s\r\nBM_streamvbyte_decode/1048576     178497 ns       178497 ns         3907 2.93724G/s\r\nBM_zigzag_encode/4096                810 ns          810 ns       854076 2.52829G/s\r\nBM_zigzag_encode/8192               1611 ns         1608 ns       433154   2.548G/s\r\nBM_zigzag_encode/16384              3174 ns         3174 ns       219165 2.58084G/s\r\nBM_zigzag_encode/32768              6457 ns         6457 ns       108415 2.53754G/s\r\nBM_zigzag_encode/65536             12582 ns        12582 ns        54747 2.60432G/s\r\nBM_zigzag_encode/131072            25243 ns        25243 ns        27802 2.59617G/s\r\nBM_zigzag_encode/262144            50278 ns        50278 ns        13952 2.60693G/s\r\nBM_zigzag_encode/524288           100563 ns       100562 ns         6932 2.60678G/s\r\nBM_zigzag_encode/1048576          211846 ns       211845 ns         3222 2.47487G/s\r\nBM_zigzag_decode/4096                675 ns          675 ns      1041044 3.03263G/s\r\nBM_zigzag_decode/8192               1342 ns         1342 ns       523553 3.05196G/s\r\nBM_zigzag_decode/16384              2643 ns         2643 ns       265497 3.09905G/s\r\nBM_zigzag_decode/32768              5383 ns         5383 ns       130976 3.04377G/s\r\nBM_zigzag_decode/65536             11474 ns        11474 ns        60817 2.85588G/s\r\nBM_zigzag_decode/131072            21777 ns        21777 ns        32345 3.00944G/s\r\nBM_zigzag_decode/262144            43477 ns        43478 ns        14387  3.0147G/s\r\nBM_zigzag_decode/524288            86120 ns        86120 ns         8145 3.04393G/s\r\nBM_zigzag_decode/1048576          173095 ns       173093 ns         4028 3.02894G/s\r\n```\r\n\r\n\u003e The unit of `Throughput` is `GB/s` (Giga Bytes per second).\r\n\r\n### Build Benchmarks from Source\r\n\r\n```bash\r\ncmake . \\\r\n    -DCMAKE_BUILD_TYPE=Release \\\r\n    -DBUILD_SHARED_LIBS=OFF \\\r\n    -DBUILD_PYBIND11=OFF \\\r\n    -DPRINT_BENCHMARK=OFF \\\r\n    -DBUILD_TESTS=ON \\\r\n    -DBUILD_BENCHMARKS=ON\r\nmake libstreamvbyte_benchmarks\r\n./libstreamvbyte_benchmarks --benchmark_counters_tabular=true\r\n```\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- ROADMAP --\u003e\r\n\r\n## Roadmap\r\n\r\n- [x] Zigzag encoding/decoding.\r\n- [x] Support ARM processors with `NEON` intrinsics.\r\n- [ ] Differential coding (delta encoding/decoding).\r\n\r\nSee the [open issues](https://github.com/wst24365888/libstreamvbyte/issues)\r\nfor a full list of proposed features (and known issues).\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- CONTRIBUTING --\u003e\r\n\r\n## Contributing\r\n\r\nContributions are what make the open source community such an amazing place to\r\nlearn, inspire, and create. Any contributions you make are **greatly\r\nappreciated**.\r\n\r\nIf you have a suggestion that would make this better, please fork the repo and\r\ncreate a pull request. You can also simply open an issue with the tag\r\n\"enhancement\". Don't forget to give the project a star! Thanks again!\r\n\r\n1. Fork the Project\r\n2. Create your Feature Branch (`git checkout -b feat/amazing-feature`)\r\n3. Commit your Changes with\r\n   [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/)\r\n4. Push to the Branch (`git push origin feat/amazing-feature`)\r\n5. Open a Pull Request\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- LICENSE --\u003e\r\n\r\n## License\r\n\r\nDistributed under the MIT License. See\r\n[LICENSE](https://github.com/wst24365888/libstreamvbyte/blob/main/LICENSE)\r\nfor more information.\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- REFERENCE --\u003e\r\n\r\n## Reference\r\n\r\n- Daniel Lemire, Nathan Kurz, Christoph Rupp, [Stream VByte: Faster Byte-Oriented Integer Compression](https://arxiv.org/abs/1709.08990), Information Processing Letters 130, 2018.\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n\u003c!-- CONTACT --\u003e\r\n\r\n## Contact\r\n\r\n### Author\r\n\r\n- HSING-HAN, WU (Xyphuz)\r\n  - Mail me: xyphuzwu@gmail.com\r\n  - About me: \u003chttps://www.xyphuz.com\u003e\r\n  - GitHub: \u003chttps://github.com/wst24365888\u003e\r\n\r\n### Project Link\r\n\r\n- \u003chttps://github.com/wst24365888/libstreamvbyte\u003e\r\n\r\n\u003cp align=\"right\"\u003e(\u003ca href=\"#top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\r\n\r\n[contributors-shield]: https://img.shields.io/github/contributors/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[contributors-url]: https://github.com/wst24365888/libstreamvbyte/graphs/contributors\r\n[forks-shield]: https://img.shields.io/github/forks/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[forks-url]: https://github.com/wst24365888/libstreamvbyte/network/members\r\n[stars-shield]: https://img.shields.io/github/stars/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[stars-url]: https://github.com/wst24365888/libstreamvbyte/stargazers\r\n[issues-shield]: https://img.shields.io/github/issues/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[issues-url]: https://github.com/wst24365888/libstreamvbyte/issues\r\n[issues-closed-shield]: https://img.shields.io/github/issues-closed/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[issues-closed-url]: https://github.com/wst24365888/libstreamvbyte/issues?q=is%3Aissue+is%3Aclosed\r\n[license-shield]: https://img.shields.io/github/license/wst24365888/libstreamvbyte.svg?style=for-the-badge\r\n[license-url]: https://github.com/wst24365888/libstreamvbyte/blob/main/LICENSE\r\n[python-version-shield]: https://img.shields.io/pypi/pyversions/libstreamvbyte?color=A000A0\u0026style=for-the-badge\r\n[python-version-url]: https://pypi.org/project/libstreamvbyte/\r\n[download-shield]: https://img.shields.io/pypi/dw/libstreamvbyte?color=A000A0\u0026style=for-the-badge\r\n[download-url]: https://pypistats.org/packages/libstreamvbyte\r\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fwst24365888%2Flibstreamvbyte","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fwst24365888%2Flibstreamvbyte","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fwst24365888%2Flibstreamvbyte/lists"}