{"id":20840245,"url":"https://github.com/stefandeveloper/heidgaf","last_synced_at":"2026-05-08T02:02:59.357Z","repository":{"id":229174665,"uuid":"661719296","full_name":"stefanDeveloper/heiDGAF","owner":"stefanDeveloper","description":"heiDGAF - a machine learning based DNS analyzer to detect DGAs","archived":false,"fork":false,"pushed_at":"2024-04-11T12:06:31.000Z","size":1836,"stargazers_count":1,"open_issues_count":0,"forks_count":0,"subscribers_count":2,"default_branch":"main","last_synced_at":"2024-04-11T13:31:14.015Z","etag":null,"topics":["cybersecurity","dga","dns","machine-learning","network-analysis"],"latest_commit_sha":null,"homepage":"https://heidgaf.readthedocs.io","language":"Jupyter Notebook","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"eupl-1.2","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/stefanDeveloper.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null}},"created_at":"2023-07-03T13:46:26.000Z","updated_at":"2024-04-15T13:33:21.077Z","dependencies_parsed_at":"2024-04-11T13:29:09.310Z","dependency_job_id":null,"html_url":"https://github.com/stefanDeveloper/heiDGAF","commit_stats":null,"previous_names":["stefandeveloper/heidgaf"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stefanDeveloper%2FheiDGAF","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stefanDeveloper%2FheiDGAF/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stefanDeveloper%2FheiDGAF/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/stefanDeveloper%2FheiDGAF/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/stefanDeveloper","download_url":"https://codeload.github.com/stefanDeveloper/heiDGAF/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":243196658,"owners_count":20251861,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["cybersecurity","dga","dns","machine-learning","network-analysis"],"created_at":"2024-11-18T01:15:43.952Z","updated_at":"2025-12-26T02:42:01.706Z","avatar_url":"https://github.com/stefanDeveloper.png","language":"Jupyter Notebook","funding_links":[],"categories":[],"sub_categories":[],"readme":"\u003ca id=\"readme-top\"\u003e\u003c/a\u003e\n\n\u003c!-- PROJECT SHIELDS --\u003e\n\u003cdiv align=\"center\"\u003e\n\n[![Codecov Coverage][coverage-shield]][coverage-url]\n[![Contributors][contributors-shield]][contributors-url]\n[![Forks][forks-shield]][forks-url]\n[![Stargazers][stars-shield]][stars-url]\n[![Issues][issues-shield]][issues-url]\n[![EUPL License][license-shield]][license-url]\n\n\n\u003c/div\u003e\n\n\u003c!-- PROJECT LOGO --\u003e\n\u003cbr /\u003e\n\u003cdiv align=\"center\"\u003e\n  \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF\"\u003e\n    \u003cimg src=\"https://raw.githubusercontent.com/stefanDeveloper/heiDGAF/main/assets/heidgaf_logo_normal.png?raw=true\" alt=\"Logo\"\u003e\n  \u003c/a\u003e\n\n\u003ch3 align=\"center\"\u003eheiDGAF - Domain Generation Algorithms Finder\u003c/h3\u003e\n\n  \u003cp align=\"center\"\u003e\n    Machine learning-based DNS classifier for detecting Domain Generation Algorithms (DGAs), tunneling, and data exfiltration by malicious actors.\n    \u003cbr /\u003e\n    \u003ca href=\"https://heidgaf.readthedocs.io/en/latest/\"\u003e\u003cstrong\u003eExplore the docs »\u003c/strong\u003e\u003c/a\u003e\n    \u003cbr /\u003e\n    \u003cbr /\u003e\n    \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/issues/new?labels=bug\u0026template=bug-report---.md\"\u003eReport Bug\u003c/a\u003e\n    ·\n    \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/issues/new?labels=enhancement\u0026template=feature-request---.md\"\u003eRequest Feature\u003c/a\u003e\n  \u003c/p\u003e\n\u003c/div\u003e\n\n\u003e [!CAUTION]\n\u003e This project has been moved to https://github.com/Hamstring-NDR/hamstring. Future development, issues, and releases will be maintained there.\n\n\u003ctable\u003e\n\u003ctr\u003e\n  \u003ctd\u003e\u003cb\u003eContinuous Integration\u003c/b\u003e\u003c/td\u003e\n  \u003ctd\u003e\n    \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/actions/workflows/build_test_linux.yml\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/actions/workflow/status/stefanDeveloper/heiDGAF/build_test_linux.yml?branch=main\u0026logo=linux\u0026style=for-the-badge\u0026label=linux\" alt=\"Linux WorkFlows\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/actions/workflows/build_test_macos.yml\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/actions/workflow/status/stefanDeveloper/heiDGAF/build_test_macos.yml?branch=main\u0026logo=apple\u0026style=for-the-badge\u0026label=macos\" alt=\"MacOS WorkFlows\" /\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/actions/workflows/build_test_windows.yml\"\u003e\n    \u003cimg src=\"https://img.shields.io/github/actions/workflow/status/stefanDeveloper/heiDGAF/build_test_windows.yml?branch=main\u0026logo=windows\u0026style=for-the-badge\u0026label=windows\" alt=\"Windows WorkFlows\" /\u003e\n    \u003c/a\u003e\n  \u003c/td\u003e\n\u003c/tr\u003e\n\u003c/table\u003e\n\n## About the Project\n\n![Pipeline overview](https://raw.githubusercontent.com/stefanDeveloper/heiDGAF/main/docs/media/heidgaf_overview_detailed.drawio.png?raw=true)\n\n## Getting Started\n\n#### Run **heiDGAF** using Docker Compose:\n\n```sh\nHOST_IP=127.0.0.1 docker compose -f docker/docker-compose.yml up\n```\n\u003cp align=\"center\"\u003e\n  \u003cimg src=\"https://raw.githubusercontent.com/stefanDeveloper/heiDGAF/main/assets/terminal_example.gif?raw=true\" alt=\"Terminal example\"/\u003e\n\u003c/p\u003e\n\n#### Or run the modules locally on your machine:\n```sh\npython -m venv .venv\nsource .venv/bin/activate\n\nsh install_requirements.sh\n```\nAlternatively, you can use `pip install` and enter all needed requirements individually with `-r requirements.*.txt`.\n\nNow, you can start each stage, e.g. the inspector:\n\n```sh\npython src/inspector/inspector.py\n```\n\n\u003cp align=\"right\"\u003e(\u003ca href=\"#readme-top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\n\n\n## Usage\n\n### Configuration\n\nTo configure **heiDGAF** according to your needs, use the provided `config.yaml`.\n\nThe most relevant settings are related to your specific log line format, the model you want to use, and\npossibly infrastructure.\n\nThe section `pipeline.log_collection.collector.logline_format` has to be adjusted to reflect your specific input log\nline format. Using our adjustable and flexible log line configuration, you can rename, reorder and fully configure each\nfield of a valid log line. Freely define timestamps, RegEx patterns, lists, and IP addresses. For example, your\nconfiguration might look as follows:\n\n```yml\n- [ \"timestamp\", Timestamp, \"%Y-%m-%dT%H:%M:%S.%fZ\" ]\n- [ \"status_code\", ListItem, [ \"NOERROR\", \"NXDOMAIN\" ], [ \"NXDOMAIN\" ] ]\n- [ \"client_ip\", IpAddress ]\n- [ \"dns_server_ip\", IpAddress ]\n- [ \"domain_name\", RegEx, '^(?=.{1,253}$)((?!-)[A-Za-z0-9-]{1,63}(?\u003c!-)\\.)+[A-Za-z]{2,63}$' ]\n- [ \"record_type\", ListItem, [ \"A\", \"AAAA\" ] ]\n- [ \"response_ip\", IpAddress ]\n- [ \"size\", RegEx, '^\\d+b$' ]\n```\n\nThe options `pipeline.data_inspection` and `pipeline.data_analysis` are relevant for configuring the model. The section\n`environment` can be fine-tuned to prevent naming collisions for Kafka topics and adjust addressing in your environment.\n\nFor more in-depth information on your options, have a look at our\n[official documentation](https://heidgaf.readthedocs.io/en/latest/usage.html), where we provide tables explaining all\nvalues in detail.\n\n### Monitoring\nTo monitor the system and observe its real-time behavior, multiple Grafana dashboards have been set up.\n\nHave a look at the following pictures showing examples of how these dashboards might look at runtime.\n\n\u003cdetails\u003e\n  \u003csummary\u003e\u003cstrong\u003eOverview\u003c/strong\u003e dashboard\u003c/summary\u003e\n\n  Contains the most relevant information on the system's runtime behavior, its efficiency and its effectivity.\n\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"./assets/readme_assets/overview.png\"\u003e\n      \u003cimg src=\"./assets/readme_assets/overview.png\" alt=\"Overview Dashboard\" width=\"90%\"/\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e\u003cstrong\u003eLatencies\u003c/strong\u003e dashboard\u003c/summary\u003e\n\n  Presents any information on latencies, including comparisons between the modules and more detailed,\n  stand-alone metrics.\n\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"./assets/readme_assets/latencies.jpeg\"\u003e\n      \u003cimg src=\"./assets/readme_assets/latencies.jpeg\" alt=\"Latencies Dashboard\" width=\"90%\"/\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e\u003cstrong\u003eLog Volumes\u003c/strong\u003e dashboard\u003c/summary\u003e\n\n  Presents any information on the fill levels of each module, i.e. the number of entries that are currently in the\n  module for processing. Includes comparisons between the modules, more detailed, stand-alone metrics, as well as\n  total numbers of logs entering the pipeline or being marked as fully processed.\n\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"./assets/readme_assets/log_volumes.jpeg\"\u003e\n      \u003cimg src=\"./assets/readme_assets/log_volumes.jpeg\" alt=\"Log Volumes Dashboard\" width=\"90%\"/\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e\u003cstrong\u003eAlerts\u003c/strong\u003e dashboard\u003c/summary\u003e\n\n  Presents details on the number of logs detected as malicious including IP addresses responsible for those alerts.\n\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"./assets/readme_assets/alerts.png\"\u003e\n      \u003cimg src=\"./assets/readme_assets/alerts.png\" alt=\"Alerts Dashboard\" width=\"90%\"/\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n\n\u003c/details\u003e\n\n\u003cdetails\u003e\n  \u003csummary\u003e\u003cstrong\u003eDataset\u003c/strong\u003e dashboard\u003c/summary\u003e\n\n  This dashboard is only active for the **_datatest_** mode. Users who want to test their own models can use this mode\n  for inspecting confusion matrices on testing data.\n\n  \u003e This feature is in a very early development stage.\n\n  \u003cp align=\"center\"\u003e\n    \u003ca href=\"./assets/readme_assets/datatests.png\"\u003e\n      \u003cimg src=\"./assets/readme_assets/datatests.png\" alt=\"Dataset Dashboard\" width=\"80%\"/\u003e\n    \u003c/a\u003e\n  \u003c/p\u003e\n\n\u003c/details\u003e\n\n\u003cp align=\"right\"\u003e(\u003ca href=\"#readme-top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\n\n\n## Models and Training\n\nTo train and test our and possibly your own models, we currently rely on the following datasets:\n\n- [CICBellDNS2021](https://www.unb.ca/cic/datasets/dns-2021.html)\n- [DGTA Benchmark](https://data.mendeley.com/datasets/2wzf9bz7xr/1)\n- [DNS Tunneling Queries for Binary Classification](https://data.mendeley.com/datasets/mzn9hvdcxg/1)\n- [UMUDGA - University of Murcia Domain Generation Algorithm Dataset](https://data.mendeley.com/datasets/y8ph45msv8/1)\n- [DGArchive](https://dgarchive.caad.fkie.fraunhofer.de/)\n\nWe compute all features separately and only rely on the `domain` and `class` for binary classification.\n\n### Inserting Data for Testing\n\nFor testing purposes, we provide multiple scripts in the `scripts` directory. Use `real_logs.dev.py` to send data from\nthe datasets into the pipeline. After downloading the dataset and storing it under `\u003cproject-root\u003e/data`, run\n```sh\npython scripts/real_logs.dev.py\n```\nto start continuously inserting dataset traffic.\n\n### Training Your Own Models\n\n\u003e [!IMPORTANT]\n\u003e This is only a brief wrap-up of a custom training process.\n\u003e We highly encourage you to have a look at the [documentation](https://heidgaf.readthedocs.io/en/latest/training.html)\n\u003e for a full description and explanation of the configuration parameters.\n\nWe feature two trained models:\n1. XGBoost (`src/train/model.py#XGBoostModel`) and\n2. RandomForest (`src/train/model.py#RandomForestModel`).\n\nAfter installing the requirements, use `src/train/train.py`:\n\n```sh\n\u003e python -m venv .venv\n\u003e source .venv/bin/activate\n\n\u003e pip install -r requirements/requirements.train.txt\n\n\u003e python src/train/train.py\nUsage: train.py [OPTIONS] COMMAND [ARGS]...\n\nOptions:\n  -h, --help  Show this message and exit.\n\nCommands:\n  explain\n  test\n  train\n```\n\nSetting up the [dataset directories](#insert-test-data) (and adding the code for your model class if applicable) lets you start\nthe training process by running the following commands:\n\n#### Model Training\n\n```sh\n\u003e python src/train/train.py train  --dataset \u003cdataset_type\u003e --dataset_path \u003cpath/to/your/datasets\u003e --model \u003cmodel_name\u003e\n```\nThe results will be saved per default to `./results`, if not configured otherwise.\n\n#### Model Tests\n\n```sh\n\u003e python src/train/train.py test  --dataset \u003cdataset_type\u003e --dataset_path \u003cpath/to/your/datasets\u003e --model \u003cmodel_name\u003e --model_path \u003cpath_to_model_version\u003e\n```\n\n#### Model Explain\n\n```sh\n\u003e python src/train/train.py explain  --dataset \u003cdataset_type\u003e --dataset_path \u003cpath/to/your/datasets\u003e --model \u003cmodel_name\u003e --model_path \u003cpath_to_model_version\u003e\n```\nThis will create a `rules.txt` file containing the innards of the model, explaining the rules it created.\n\n\u003cp align=\"right\"\u003e(\u003ca href=\"#readme-top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\n\n\n\u003c!-- CONTRIBUTING --\u003e\n## Contributing\n\nContributions are what make the open source community such an amazing place to learn, inspire, and create. Any\ncontributions you make are **greatly appreciated**.\n\nIf you have a suggestion that would make this better, please fork the repo and create a pull request. You can also\nsimply open an issue with the tag \"enhancement\".\nDon't forget to give the project a star! Thanks again!\n\n### Top contributors:\n\n\u003ca href=\"https://github.com/stefanDeveloper/heiDGAF/graphs/contributors\"\u003e\n  \u003cimg src=\"https://contrib.rocks/image?repo=stefanDeveloper/heiDGAF\" alt=\"contrib.rocks image\" /\u003e\n\u003c/a\u003e\n\n\n\u003cp align=\"right\"\u003e(\u003ca href=\"#readme-top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\n\n\u003c!-- LICENSE --\u003e\n\n## License\n\nDistributed under the EUPL License. See `LICENSE.txt` for more information.\n\n\u003cp align=\"right\"\u003e(\u003ca href=\"#readme-top\"\u003eback to top\u003c/a\u003e)\u003c/p\u003e\n\n\n\u003c!-- MARKDOWN LINKS \u0026 IMAGES --\u003e\n\u003c!-- https://www.markdownguide.org/basic-syntax/#reference-style-links --\u003e\n\n[contributors-shield]: https://img.shields.io/github/contributors/stefanDeveloper/heiDGAF.svg?style=for-the-badge\n\n[contributors-url]: https://github.com/stefanDeveloper/heiDGAF/graphs/contributors\n\n[forks-shield]: https://img.shields.io/github/forks/stefanDeveloper/heiDGAF.svg?style=for-the-badge\n\n[forks-url]: https://github.com/stefanDeveloper/heiDGAF/network/members\n\n[stars-shield]: https://img.shields.io/github/stars/stefanDeveloper/heiDGAF.svg?style=for-the-badge\n\n[stars-url]: https://github.com/stefanDeveloper/heiDGAF/stargazers\n\n[issues-shield]: https://img.shields.io/github/issues/stefanDeveloper/heiDGAF.svg?style=for-the-badge\n\n[issues-url]: https://github.com/stefanDeveloper/heiDGAF/issues\n\n[license-shield]: https://img.shields.io/github/license/stefanDeveloper/heiDGAF.svg?style=for-the-badge\n\n[license-url]: https://github.com/stefanDeveloper/heiDGAF/blob/master/LICENSE.txt\n\n[coverage-shield]: https://img.shields.io/codecov/c/github/stefanDeveloper/heiDGAF?style=for-the-badge\n\n[coverage-url]: https://app.codecov.io/github/stefanDeveloper/heiDGAF\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstefandeveloper%2Fheidgaf","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fstefandeveloper%2Fheidgaf","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fstefandeveloper%2Fheidgaf/lists"}