{"id":30740382,"url":"https://github.com/hosseinmoein/dataframe","last_synced_at":"2025-09-04T00:03:38.835Z","repository":{"id":40137419,"uuid":"108669343","full_name":"hosseinmoein/DataFrame","owner":"hosseinmoein","description":"C++ DataFrame for statistical, financial, and ML analysis in modern C++","archived":false,"fork":false,"pushed_at":"2025-09-02T14:37:07.000Z","size":50083,"stargazers_count":2793,"open_issues_count":0,"forks_count":341,"subscribers_count":74,"default_branch":"master","last_synced_at":"2025-09-04T00:01:56.676Z","etag":null,"topics":["ai","cpp","data-analysis","data-science","dataframe","financial-data-analysis","financial-engineering","heterogeneous-data","large-data","machine-learning","multidimensional-data","numerical-analysis","pandas","polars","statistical","statistical-analysis","tensor","tensorboard","trading-algorithms","trading-strategies"],"latest_commit_sha":null,"homepage":"https://hosseinmoein.github.io/DataFrame/","language":"C++","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-3-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/hosseinmoein.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":"docs/CONTRIBUTING.md","funding":".github/FUNDING.yml","license":"License","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null},"funding":{"github":["hosseinmoein"]}},"created_at":"2017-10-28T17:25:45.000Z","updated_at":"2025-09-03T11:38:19.000Z","dependencies_parsed_at":"2023-02-12T05:16:59.119Z","dependency_job_id":"9b1f133b-c3ae-46e3-a931-8c3a3a6c554b","html_url":"https://github.com/hosseinmoein/DataFrame","commit_stats":{"total_commits":1532,"total_committers":23,"mean_commits":66.6086956521739,"dds":0.5097911227154047,"last_synced_commit":"ba406f52fb90ca41fd69e345101b824ef14659f5"},"previous_names":[],"tags_count":34,"template":false,"template_full_name":null,"purl":"pkg:github/hosseinmoein/DataFrame","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/hosseinmoein%2FDataFrame","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/hosseinmoein%2FDataFrame/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/hosseinmoein%2FDataFrame/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/hosseinmoein%2FDataFrame/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/hosseinmoein","download_url":"https://codeload.github.com/hosseinmoein/DataFrame/tar.gz/refs/heads/master","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/hosseinmoein%2FDataFrame/sbom","scorecard":{"id":469347,"data":{"date":"2025-08-11","repo":{"name":"github.com/hosseinmoein/DataFrame","commit":"a702f9c149fa4f0ee6de3e69abb1d7e5b59888b2"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":4.8,"checks":[{"name":"Maintained","score":10,"reason":"30 commit(s) and 6 issue activity found in the last 90 days -- score normalized to 10","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Token-Permissions","score":-1,"reason":"internal error: internal error: invalid GitHub workflow:\n:24:0: could not parse as YAML: yaml: line 24: did not find expected key [syntax-check]","details":null,"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"Dangerous-Workflow","score":-1,"reason":"internal error: internal error: invalid GitHub workflow:\n:24:0: could not parse as YAML: yaml: line 24: did not find expected key [syntax-check]","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Pinned-Dependencies","score":-1,"reason":"internal error: internal error: invalid GitHub workflow:\n:24:0: could not parse as YAML: yaml: line 24: did not find expected key [syntax-check]","details":null,"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"Code-Review","score":0,"reason":"Found 0/26 approved changesets -- score normalized to 0","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Packaging","score":-1,"reason":"internal error: internal error: invalid GitHub workflow:\n:24:0: could not parse as YAML: yaml: line 24: did not find expected key [syntax-check]","details":null,"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Security-Policy","score":0,"reason":"security policy file not detected","details":["Warn: no security policy file detected","Warn: no security file to analyze","Warn: no security file to analyze","Warn: no security file to analyze"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: License:0","Info: FSF or OSI recognized license: BSD 3-Clause \"New\" or \"Revised\" License: License:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Branch-Protection","score":0,"reason":"branch protection not enabled on development/release branches","details":["Warn: branch protection not enabled for branch 'master'"],"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"SAST","score":-1,"reason":"internal error: internal error: invalid GitHub workflow:\n:24:0: could not parse as YAML: yaml: line 24: did not find expected key [syntax-check]","details":null,"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}}]},"last_synced_at":"2025-08-19T13:24:55.749Z","repository_id":40137419,"created_at":"2025-08-19T13:24:55.749Z","updated_at":"2025-08-19T13:24:55.749Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":273529550,"owners_count":25121828,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-03T02:00:09.631Z","response_time":76,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["ai","cpp","data-analysis","data-science","dataframe","financial-data-analysis","financial-engineering","heterogeneous-data","large-data","machine-learning","multidimensional-data","numerical-analysis","pandas","polars","statistical","statistical-analysis","tensor","tensorboard","trading-algorithms","trading-strategies"],"created_at":"2025-09-04T00:01:32.688Z","updated_at":"2025-09-04T00:03:38.812Z","avatar_url":"https://github.com/hosseinmoein.png","language":"C++","readme":"\u003c!--\nCopyright (c) 2019-2026, Hossein Moein\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n* Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n* Redistributions in binary form must reproduce the above copyright\nnotice, this list of conditions and the following disclaimer in the\ndocumentation and/or other materials provided with the distribution.\n* Neither the name of Hossein Moein and/or the DataFrame nor the\nnames of its contributors may be used to endorse or promote products\nderived from this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL Hossein Moein BE LIABLE FOR ANY\nDIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\nON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--\u003e\n\n[![C++23](https://img.shields.io/badge/C%2B%2B-23-blue.svg)](https://isocpp.org/std/the-standard )\n[![Build status](https://ci.appveyor.com/api/projects/status/hjw01qui3bvxs8yi?svg=true)](https://ci.appveyor.com/project/hosseinmoein/dataframe)\n[![Codacy Badge](https://api.codacy.com/project/badge/Grade/db646376a4014c3788c7224e670fe451)](https://app.codacy.com/organizations/gh/hosseinmoein/repositories)\n\u003cBR\u003e\n[![GitHub tag (latest by date)](https://img.shields.io/github/tag-date/hosseinmoein/DataFrame.svg?color=blue\u0026label=Official%20Release\u0026style=popout)](https://github.com/hosseinmoein/DataFrame/releases)\n[![Conan Center](https://img.shields.io/conan/v/dataframe)](https://conan.io/center/recipes/dataframe)\n[![VCPKG package](https://repology.org/badge/version-for-repo/vcpkg/dataframe.svg)](https://vcpkg.link/ports/dataframe)\n\n\u003cimg src=\"docs/LionLookingUp.jpg\" alt=\"DataFrame Lion\" width=\"400\"/\u003e\n\n## \u003ca href=\"https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/DataFrame.html\" target=\"_blank\"\u003e\u003cB\u003eDataFrame documentation with code samples\u003c/B\u003e\u003c/a\u003e\n\nThis is a C++ analytical library designed for data analysis similar to libraries in Python and R. For example, you would compare this to [Pandas](https://pandas.pydata.org) or [R data.frame](https://www.w3schools.com/r/r_data_frames.asp). The depth and breadth of functionalities offered by C++ DataFrame alone are greater than functionalities offered by packages such as Pandas, data.frame, and Polars combined.\u003cBR\u003e\nYou can slice the data in many different ways. You can join, merge, group-by the data. You can run various statistical, summarization, financial, and ML algorithms on the data. You can add your custom algorithms easily. You can multi-column sort, custom pick and delete the data. And more …\u003cBR\u003e\nDataFrame also includes a large collection of analytical algorithms in form of visitors. These are from basic stats such as \u003cI\u003eMean\u003c/I\u003e, \u003cI\u003eStd Deviation\u003c/I\u003e, \u003cI\u003emoving averages\u003c/I\u003e, ... to more involved analysis such as \u003cI\u003ePCA\u003c/I\u003e, \u003cI\u003ePolynomial Fit\u003c/I\u003e, \u003cI\u003eFast Fourier transform\u003c/I\u003e ... including a good collection of trading indicators. You can also easily add your own algorithms.\u003cBR\u003e\nDataFrame also employs extensive multithreading in almost all its API’s, for large datasets. That makes DataFrame especially suitable for analyzing large datasets.\u003cBR\u003e\nFor basic operations to start you off, see [Hello World](examples/hello_world.cc) and/or [Cheat Sheet](https://docs.google.com/viewer?url=https://raw.githubusercontent.com/hosseinmoein/DataFrame/master/docs/CheatSheet.pdf). For a complete list of features with code samples, see \u003ca href=\"https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/DataFrame.html\" target=\"_blank\"\u003edocumentation\u003c/a\u003e.\n\nI have followed a few \u003cB\u003eprinciples in this library\u003c/B\u003e:\u003cBR\u003e\n\n1. [Support any type either built-in or user defined without needing new code](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/any_type.html)\n2. [Never chase pointers ala _linked lists_, _std::any_, _pointer to base_, ...](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/pointers.html)\n3. [Have all column data in contiguous memory space](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/contiguous_memory.html)\n4. [Never use more space than you need ala _unions_, _std::variant_, ...](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/std_variant.html)\n5. [Avoid copying data as much as possible](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/copying_data.html)\n6. [Use multi-threading but only when it makes sense](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/multithreading.html)\n7. [Do not attempt to protect the user against _garbage in_, _garbage out_](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/garbage_in_garbage_out.html)\n8. [Keep DataFrame library self-contained, meaning DataFrame must only depend on _C++ language_ and its _standard library_](https://htmlpreview.github.io/?https://github.com/hosseinmoein/DataFrame/blob/master/docs/HTML/self_contained.html)\n\n---\n\n### Performance\n\nYou have probably heard of Polars DataFrame. It is implemented in Rust and ported with zero-overhead to Python (as long as you don’t have a loop). I have been asked by many people to write a comparison for \u003cB\u003eDataFrame vs. Polars\u003c/B\u003e. So, I finally found some time to learn a bit about Polars and write a very simple benchmark.\u003cBR\u003e\nI wrote the following identical programs for both Polars and C++ DataFrame (and Pandas). I used Polars version: 0.19.14 (Pandas version: 1.5.3, Numpy version: 1.24.2). And I used C++23 GCC-14 compiler with -O3 option. I ran both on my, somewhat outdated, MacBook Pro (Intel chip, 96GB RAM).\u003cBR\u003e\nIn both cases, I created a dataframe with 3 random columns. The C++ DataFrame also required an additional index column of the same size. Polars doesn’t believe in index columns (that has its own pros and cons. I am not going through it here).\nEach program has three identical parts. First it generates and populates 3 columns with 300m random numbers each (in case of C++ DataFrame, it must also generate a sequential index column of the same size). That is the part I am _not_ interested in. In the second part, it calculates the mean of the first column, the variance of the second column, and the Pearson correlation of the second and third columns. In the third part, it does a select (or filter as Polars calls it) on one of the columns.\n\n**Results**:\u003cBR\u003e\nThe maximum dataset I could load into Polars was 300m rows per column. Any bigger dataset blew up the memory and caused OS to kill it. I ran C++ DataFrame with 10b rows per column and I am sure it would have run with bigger datasets too. So, I was forced to run both with 300m rows to compare.\nI ran each test 4 times and took the best time. Polars numbers varied a lot from one run to another, especially calculation and selection times. C++ DataFrame numbers were significantly more consistent.\n\n\n|                      | [\u003cB\u003eC++ DataFrame\u003c/B\u003e](https://github.com/hosseinmoein/DataFrame/blob/master/benchmarks/dataframe_performance.cc) | [\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003cB\u003ePolars\u003c/B\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;](https://github.com/hosseinmoein/DataFrame/blob/master/benchmarks/polars_performance.py) | [\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003cB\u003ePandas\u003c/B\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;](https://github.com/hosseinmoein/DataFrame/blob/master/benchmarks/pandas_performance.py) |\n| :--------------------- | ------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |\n| Data Generation Time |                                                                                                      26.9459 secs |                                                                                                                                                                    28.4686 secs |                                                                                                                                                                    36.6799 secs |\n| Calculation Time     |                                                                                                       1.2602 secs |                                                                                                                                                                     4.8766 secs |                                                                                                                                                                    40.3264 secs |\n| Selection Time       |                                                                                                       0.4215 secs |                                                                                                                                                                     3.8766 secs |                                                                                                                                                                     8.3264 secs |\n| Overall Time         |                                                                                                      28.9486 secs |                                                                                                                                                                    36.8763 secs |                                                                                                                                                                    85.8451 secs |\n\n---\n\n[**Please consider sponsoring DataFrame, especially if you are using it in production capacity. It is the strongest form of appreciation**](https://github.com/sponsors/hosseinmoein)\n\n---\n","funding_links":["https://github.com/sponsors/hosseinmoein"],"categories":[],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fhosseinmoein%2Fdataframe","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fhosseinmoein%2Fdataframe","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fhosseinmoein%2Fdataframe/lists"}