{"id":25740139,"url":"https://github.com/datadotworld/cwd-benchmark-data","last_synced_at":"2025-10-09T00:50:31.185Z","repository":{"id":207240028,"uuid":"704233222","full_name":"datadotworld/cwd-benchmark-data","owner":"datadotworld","description":"Data for the Chat With Your Data benchmark. ","archived":false,"fork":false,"pushed_at":"2023-12-01T10:24:37.000Z","size":38,"stargazers_count":136,"open_issues_count":7,"forks_count":25,"subscribers_count":9,"default_branch":"main","last_synced_at":"2025-05-08T21:14:23.258Z","etag":null,"topics":["dwstruct-t50-public-projects"],"latest_commit_sha":null,"homepage":"","language":"Shell","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/datadotworld.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE.txt","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2023-10-12T20:28:44.000Z","updated_at":"2025-04-26T18:50:04.000Z","dependencies_parsed_at":"2023-11-14T20:24:51.782Z","dependency_job_id":"34e506bb-c5f5-4397-9a85-35c1860a661d","html_url":"https://github.com/datadotworld/cwd-benchmark-data","commit_stats":null,"previous_names":["datadotworld/cwd-benchmark-data"],"tags_count":0,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datadotworld%2Fcwd-benchmark-data","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datadotworld%2Fcwd-benchmark-data/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datadotworld%2Fcwd-benchmark-data/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datadotworld%2Fcwd-benchmark-data/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/datadotworld","download_url":"https://codeload.github.com/datadotworld/cwd-benchmark-data/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":253149618,"owners_count":21861740,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["dwstruct-t50-public-projects"],"created_at":"2025-02-26T08:36:39.582Z","updated_at":"2025-10-09T00:50:26.154Z","avatar_url":"https://github.com/datadotworld.png","language":"Shell","funding_links":[],"categories":[],"sub_categories":[],"readme":"# Chat with your Data (cwd) Benchmark Data\n\n## Introduction \n\nThis repository contains the data and metadata for the \"Chat with your Data\" benchmark. The aim of this project is to provide a comprehensive set of test scenarios for Language-to-query (specifically SQL and SPARQL) systems. \n\nIt focuses on testing whether these systems are capable of accurately converting natural language questions into valid, effective queries against various data sources. \n\n## Repository Structure\n\nThis repository is divided into multiple directories, each containing a specific type of data or metadata:\n\n- `ontology/`: This directory contains OWL file(s) representing the ontology data. \n\n- `DDL/`: This directory contains the DDL definitions for the database schema. \n\n- `investigation/`: Each Turtle (.ttl) file in this directory represents a complete benchmark investigation, which includes pointers to the dataset, metadata, and a set of inquiries. \n\n- `data/`: This directory contains the dataset(s) used for the benchmark. The data is represented in multiple formats to support a wide range of query languages.  In addition to the CSV files, there is an R2RML file that describes the mapping between the ontology and the data tables. \n\n## File Formats\n\n- OWL: Web Ontology Language, used for representing the ontology data.\n\n- DDL: Data Definition Language, used for defining and managing databases.\n\n- TTL: Turtle form of RDF, used to represent the complete benchmark investigation.\n\n - R2RML: a TTL file that describes mappings according to the [RDB to RDF Mapping Language](https://www.w3.org/TR/r2rml/)\n \n- CSV/TSV/etc.: Various data formats used for the benchmark dataset.\n\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdatadotworld%2Fcwd-benchmark-data","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fdatadotworld%2Fcwd-benchmark-data","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdatadotworld%2Fcwd-benchmark-data/lists"}