{"id":32388703,"url":"https://github.com/data-integrations/wrangler","last_synced_at":"2025-10-25T03:55:15.743Z","repository":{"id":39633602,"uuid":"74907921","full_name":"data-integrations/wrangler","owner":"data-integrations","description":"Wrangler Transform: A DMD system for transforming Big Data","archived":false,"fork":false,"pushed_at":"2025-08-20T11:08:34.000Z","size":6518,"stargazers_count":107,"open_issues_count":242,"forks_count":1201,"subscribers_count":9,"default_branch":"develop","last_synced_at":"2025-09-08T21:18:27.462Z","etag":null,"topics":["avro","big-data","cdap","cdap-plugin","data-cleansing","data-prep","data-science","data-transform","data-transformation","manipulate-data","parsing","preparation","project","transform","transform-data","wrangle"],"latest_commit_sha":null,"homepage":"http://docs.cdap.io/cdap/current/en/developers-manual/pipelines/index.html","language":"Java","has_issues":false,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/data-integrations.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":"SECURITY.md","support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2016-11-27T19:01:32.000Z","updated_at":"2025-08-13T11:16:08.000Z","dependencies_parsed_at":"2024-02-13T06:29:42.061Z","dependency_job_id":"901363a9-a10e-467f-8342-26130b07b42a","html_url":"https://github.com/data-integrations/wrangler","commit_stats":null,"previous_names":["hydrator/wrangler-transform","hydrator/wrangler"],"tags_count":33,"template":false,"template_full_name":null,"purl":"pkg:github/data-integrations/wrangler","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/data-integrations%2Fwrangler","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/data-integrations%2Fwrangler/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/data-integrations%2Fwrangler/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/data-integrations%2Fwrangler/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/data-integrations","download_url":"https://codeload.github.com/data-integrations/wrangler/tar.gz/refs/heads/develop","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/data-integrations%2Fwrangler/sbom","scorecard":{"id":324133,"data":{"date":"2025-08-11","repo":{"name":"github.com/data-integrations/wrangler","commit":"8c4495a3d8a5545837fd7e26f5ac12567a8b8d1d"},"scorecard":{"version":"v5.2.1-40-gf6ed084d","commit":"f6ed084d17c9236477efd66e5b258b9d4cc7b389"},"score":6.9,"checks":[{"name":"Security-Policy","score":10,"reason":"security policy file detected","details":["Info: security policy file detected: SECURITY.md:1","Info: Found linked content: SECURITY.md:1","Info: Found disclosure, vulnerability, and/or timelines in security policy: SECURITY.md:1","Info: Found text in security policy: SECURITY.md:1"],"documentation":{"short":"Determines if the project has published a security policy.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#security-policy"}},{"name":"Code-Review","score":10,"reason":"all changesets reviewed","details":null,"documentation":{"short":"Determines if the project requires human code review before pull requests (aka merge requests) are merged.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#code-review"}},{"name":"Packaging","score":-1,"reason":"packaging workflow not detected","details":["Warn: no GitHub/GitLab publishing workflow detected."],"documentation":{"short":"Determines if the project is published as a package that others can easily download, install, easily update, and uninstall.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#packaging"}},{"name":"Maintained","score":10,"reason":"14 commit(s) and 0 issue activity found in the last 90 days -- score normalized to 10","details":null,"documentation":{"short":"Determines if the project is \"actively maintained\".","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#maintained"}},{"name":"Dangerous-Workflow","score":10,"reason":"no dangerous workflow patterns detected","details":null,"documentation":{"short":"Determines if the project's GitHub Action workflows avoid dangerous patterns.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#dangerous-workflow"}},{"name":"Token-Permissions","score":0,"reason":"detected GitHub workflow tokens with excessive permissions","details":["Info: topLevel 'actions' permission set to 'read': .github/workflows/build-report.yml:25","Warn: topLevel 'statuses' permission set to 'write': .github/workflows/build-report.yml:26","Warn: topLevel 'checks' permission set to 'write': .github/workflows/build-report.yml:27","Warn: no topLevel permission defined: .github/workflows/build.yml:1","Warn: no topLevel permission defined: .github/workflows/e2e.yml:1","Info: no jobLevel write permissions found"],"documentation":{"short":"Determines if the project's workflows follow the principle of least privilege.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#token-permissions"}},{"name":"CII-Best-Practices","score":0,"reason":"no effort to earn an OpenSSF best practices badge detected","details":null,"documentation":{"short":"Determines if the project has an OpenSSF (formerly CII) Best Practices Badge.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#cii-best-practices"}},{"name":"Binary-Artifacts","score":10,"reason":"no binaries found in the repo","details":null,"documentation":{"short":"Determines if the project has generated executable (binary) artifacts in the source repository.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#binary-artifacts"}},{"name":"License","score":10,"reason":"license file detected","details":["Info: project has a license file: LICENSE:0","Info: FSF or OSI recognized license: Apache License 2.0: LICENSE:0"],"documentation":{"short":"Determines if the project has defined a license.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#license"}},{"name":"Signed-Releases","score":-1,"reason":"no releases found","details":null,"documentation":{"short":"Determines if the project cryptographically signs release artifacts.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#signed-releases"}},{"name":"Fuzzing","score":0,"reason":"project is not fuzzed","details":["Warn: no fuzzer integrations found"],"documentation":{"short":"Determines if the project uses fuzzing.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#fuzzing"}},{"name":"Branch-Protection","score":-1,"reason":"internal error: error during branchesHandler.setup: internal error: githubv4.Query: Resource not accessible by integration","details":null,"documentation":{"short":"Determines if the default and release branches are protected with GitHub's branch protection settings.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#branch-protection"}},{"name":"Pinned-Dependencies","score":5,"reason":"dependency not pinned by hash detected -- score normalized to 5","details":["Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build-report.yml:40: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/build-report.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build.yml:40: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/build.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build.yml:44: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/build.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/build.yml:53: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/build.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/e2e.yml:48: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/e2e.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/e2e.yml:64: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/e2e.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/e2e.yml:70: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/e2e.yml/develop?enable=pin","Warn: GitHub-owned GitHubAction not pinned by hash: .github/workflows/e2e.yml:86: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/e2e.yml/develop?enable=pin","Warn: third-party GitHubAction not pinned by hash: .github/workflows/e2e.yml:93: update your workflow using https://app.stepsecurity.io/secureworkflow/data-integrations/wrangler/e2e.yml/develop?enable=pin","Info:   0 out of   8 GitHub-owned GitHubAction dependencies pinned","Info:   3 out of   4 third-party GitHubAction dependencies pinned"],"documentation":{"short":"Determines if the project has declared and pinned the dependencies of its build process.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#pinned-dependencies"}},{"name":"SAST","score":0,"reason":"SAST tool is not run on all commits -- score normalized to 0","details":["Warn: 0 commits out of 30 are checked with a SAST tool"],"documentation":{"short":"Determines if the project uses static code analysis.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#sast"}},{"name":"Vulnerabilities","score":10,"reason":"0 existing vulnerabilities detected","details":null,"documentation":{"short":"Determines if the project has open, known unfixed vulnerabilities.","url":"https://github.com/ossf/scorecard/blob/f6ed084d17c9236477efd66e5b258b9d4cc7b389/docs/checks.md#vulnerabilities"}}]},"last_synced_at":"2025-08-18T02:04:48.104Z","repository_id":39633602,"created_at":"2025-08-18T02:04:48.104Z","updated_at":"2025-08-18T02:04:48.104Z"},"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":280901444,"owners_count":26410586,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-10-25T02:00:06.499Z","response_time":81,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["avro","big-data","cdap","cdap-plugin","data-cleansing","data-prep","data-science","data-transform","data-transformation","manipulate-data","parsing","preparation","project","transform","transform-data","wrangle"],"created_at":"2025-10-25T03:55:10.859Z","updated_at":"2025-10-25T03:55:15.730Z","avatar_url":"https://github.com/data-integrations.png","language":"Java","readme":"# Data Prep\n\n![cm-available](https://cdap-users.herokuapp.com/assets/cm-available.svg)\n![cdap-transform](https://cdap-users.herokuapp.com/assets/cdap-transform.svg)\n[![Build Status](https://travis-ci.org/cdapio/hydrator-plugins.svg?branch=develop)](https://travis-ci.org/cdapio/hydrator-plugins)\n[![Coverity Scan Build Status](https://scan.coverity.com/projects/11434/badge.svg)](https://scan.coverity.com/projects/hydrator-wrangler-transform)\n[![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.cdap.wrangler/wrangler-core/badge.svg)](https://maven-badges.herokuapp.com/maven-central/io.cdap.wrangler/wrangler-core)\n[![Javadoc](https://javadoc-emblem.rhcloud.com/doc/io.cdap.wrangler/wrangler-core/badge.svg)](http://www.javadoc.io/doc/io.cdap.wrangler/wrangler-core)\n[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)\n[![Join CDAP community](https://cdap-users.herokuapp.com/badge.svg?t=wrangler)](https://cdap-users.herokuapp.com?t=1)\n\nA collection of libraries, a pipeline plugin, and a CDAP service for performing data\ncleansing, transformation, and filtering using a set of data manipulation instructions\n(directives). These instructions are either generated using an interative visual tool or\nare manually created.\n\n  * Data Prep defines few concepts that might be useful if you are just getting started with it. Learn about them [here](wrangler-docs/concepts.md)\n  * The Data Prep Transform is [separately documented](wrangler-transform/wrangler-docs/data-prep-transform.md).\n  * [Data Prep Cheatsheet](wrangler-docs/cheatsheet.md)\n\n## New Features\n\nMore [here](wrangler-docs/upcoming-features.md) on upcoming features.\n\n  * **User Defined Directives, also known as UDD**, allow you to create custom functions to transform records within CDAP DataPrep or a.k.a Wrangler. CDAP comes with a comprehensive library of functions. There are however some omissions, and some specific cases for which UDDs are the solution. Additional information on how you can build your custom directives [here](wrangler-docs/custom-directive.md).\n    * Migrating directives from version 1.0 to version 2.0 [here](wrangler-docs/directive-migration.md)\n    * Information about Grammar [here](wrangler-docs/grammar/grammar-info.md)\n    * Various `TokenType` supported by system [here](../api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java)\n    * Custom Directive Implementation Internals [here](wrangler-docs/udd-internal.md)\n\n  * A new capability that allows CDAP Administrators to **restrict the directives** that are accessible to their users.\nMore information on configuring can be found [here](wrangler-docs/exclusion-and-aliasing.md)\n\n## Demo Videos and Recipes\n\nVideos and Screencasts are best way to learn, so we have compiled simple, short screencasts that shows some of the features of Data Prep. Additional videos can be found [here](https://www.youtube.com/playlist?list=PLhmsf-NvXKJn-neqefOrcl4n7zU4TWmIr)\n\n### Videos\n\n  * [SCREENCAST] [Creating Lookup Dataset and Joining](https://www.youtube.com/watch?v=Nc1b0rsELHQ)\n  * [SCREENCAST] [Restricted Directives](https://www.youtube.com/watch?v=71EcMQU714U)\n  * [SCREENCAST] [Parse Excel files in CDAP](https://www.youtube.com/watch?v=su5L1noGlEk)\n  * [SCREENCAST] [Parse File As AVRO File](https://www.youtube.com/watch?v=tmwAw4dKUNc)\n  * [SCREENCAST] [Parsing Binary Coded AVRO Messages](https://www.youtube.com/watch?v=Ix_lPo-PDJY)\n  * [SCREENCAST] [Parsing Binary Coded AVRO Messages \u0026 Protobuf messages using schema registry](https://www.youtube.com/watch?v=LVLIdWnUX1k)\n  * [SCREENCAST] [Quantize a column - Digitize](https://www.youtube.com/watch?v=VczkYX5SRtY)\n  * [SCREENCAST] [Data Cleansing capability with send-to-error directive](https://www.youtube.com/watch?v=aZd5H8hIjDc)\n  * [SCREENCAST] [Building Data Prep from the GitHub source](https://youtu.be/pGGjKU04Y38)\n  * [VOICE-OVER] [End-to-End Demo Video](https://youtu.be/AnhF0qRmn24)\n  * [SCREENCAST] [Ingesting into Kudu](https://www.youtube.com/watch?v=KBW7a38vlUM)\n  * [SCREENCAST] [Realtime HL7 CCDA XML from Kafka into Time Parititioned Parquet](https://youtu.be/0fqNmnOnD-0)\n  * [SCREENCAST] [Parsing JSON file](https://youtu.be/vwnctcGDflE)\n  * [SCREENCAST] [Flattening arrays](https://youtu.be/SemHxgBYIsY)\n  * [SCREENCAST] [Data cleansing with send-to-error directive](https://www.youtube.com/watch?v=aZd5H8hIjDc)\n  * [SCREENCAST] [Publishing to Kafka](https://www.youtube.com/watch?v=xdc8pvvlI48)\n  * [SCREENCAST] [Fixed length to JSON](https://www.youtube.com/watch?v=3AXu4m1swuM)\n\n### Recipes\n\n  * [Parsing Apache Log Files](wrangler-demos/parsing-apache-log-files.md)\n  * [Parsing CSV Files and Extracting Column Values](wrangler-demos/parsing-csv-extracting-column-values.md)\n  * [Parsing HL7 CCDA XML Files](wrangler-demos/parsing-hl7-ccda-xml-files.md)\n\n## Available Directives\n\nThese directives are currently available:\n\n| Directive                                                              | Description                                                      |\n| ---------------------------------------------------------------------- | ---------------------------------------------------------------- |\n| **Parsers**                                                            |                                                                  |\n| [JSON Path](wrangler-docs/directives/json-path.md)                              | Uses a DSL (a JSON path expression) for parsing JSON records     |\n| [Parse as AVRO](wrangler-docs/directives/parse-as-avro.md)                      | Parsing an AVRO encoded message - either as binary or json       |\n| [Parse as AVRO File](wrangler-docs/directives/parse-as-avro-file.md)            | Parsing an AVRO data file                                        |\n| [Parse as CSV](wrangler-docs/directives/parse-as-csv.md)                        | Parsing an input record as comma-separated values                |\n| [Parse as Date](wrangler-docs/directives/parse-as-date.md)                      | Parsing dates using natural language processing                  |\n| [Parse as Excel](wrangler-docs/directives/parse-as-excel.md)                    | Parsing excel file.                                              |\n| [Parse as Fixed Length](wrangler-docs/directives/parse-as-fixed-length.md)      | Parses as a fixed length record with specified widths            |\n| [Parse as HL7](wrangler-docs/directives/parse-as-hl7.md)                        | Parsing Health Level 7 Version 2 (HL7 V2) messages               |\n| [Parse as JSON](wrangler-docs/directives/parse-as-json.md)                      | Parsing a JSON object                                            |\n| [Parse as Log](wrangler-docs/directives/parse-as-log.md)                        | Parses access log files as from Apache HTTPD and nginx servers   |\n| [Parse as Protobuf](wrangler-docs/directives/parse-as-log.md)                   | Parses an Protobuf encoded in-memory message using descriptor    |\n| [Parse as Simple Date](wrangler-docs/directives/parse-as-simple-date.md)        | Parses date strings                                              |\n| [Parse XML To JSON](wrangler-docs/directives/parse-xml-to-json.md)              | Parses an XML document into a JSON structure                     |\n| [Parse as Currency](wrangler-docs/directives/parse-as-currency.md)              | Parses a string representation of currency into a number.        |\n| [Parse as Datetime](wrangler-docs/directives/parse-as-datetime.md)              | Parses strings with datetime values to CDAP datetime type        |\n| **Output Formatters**                                                  |                                                                  |\n| [Write as CSV](wrangler-docs/directives/write-as-csv.md)                        | Converts a record into CSV format                                |\n| [Write as JSON](wrangler-docs/directives/write-as-json-map.md)                  | Converts the record into a JSON map                              |\n| [Write JSON Object](wrangler-docs/directives/write-as-json-object.md)           | Composes a JSON object based on the fields specified.            |\n| [Format as Currency](wrangler-docs/directives/format-as-currency.md)            | Formats a number as currency as specified by locale.             |\n| **Transformations**                                                    |                                                                  |\n| [Changing Case](wrangler-docs/directives/changing-case.md)                      | Changes the case of column values                                |\n| [Cut Character](wrangler-docs/directives/cut-character.md)                      | Selects parts of a string value                                  |\n| [Set Column](wrangler-docs/directives/set-column.md)                            | Sets the column value to the result of an expression execution   |\n| [Find and Replace](wrangler-docs/directives/find-and-replace.md)                | Transforms string column values using a \"sed\"-like expression    |\n| [Index Split](wrangler-docs/directives/index-split.md)                          | (_Deprecated_)                                                   |\n| [Invoke HTTP](wrangler-docs/directives/invoke-http.md)                          | Invokes an HTTP Service (_Experimental_, potentially slow)       |\n| [Quantization](wrangler-docs/directives/quantize.md)                            | Quantizes a column based on specified ranges                     |\n| [Regex Group Extractor](wrangler-docs/directives/extract-regex-groups.md)       | Extracts the data from a regex group into its own column         |\n| [Setting Character Set](wrangler-docs/directives/set-charset.md)                | Sets the encoding and then converts the data to a UTF-8 String   |\n| [Setting Record Delimiter](wrangler-docs/directives/set-record-delim.md)        | Sets the record delimiter                                        |\n| [Split by Separator](wrangler-docs/directives/split-by-separator.md)            | Splits a column based on a separator into two columns            |\n| [Split Email Address](wrangler-docs/directives/split-email.md)                  | Splits an email ID into an account and its domain                |\n| [Split URL](wrangler-docs/directives/split-url.md)                              | Splits a URL into its constituents                               |\n| [Text Distance (Fuzzy String Match)](wrangler-docs/directives/text-distance.md) | Measures the difference between two sequences of characters      |\n| [Text Metric (Fuzzy String Match)](wrangler-docs/directives/text-metric.md)     | Measures the difference between two sequences of characters      |\n| [URL Decode](wrangler-docs/directives/url-decode.md)                            | Decodes from the `application/x-www-form-urlencoded` MIME format |\n| [URL Encode](wrangler-docs/directives/url-encode.md)                            | Encodes to the `application/x-www-form-urlencoded` MIME format   |\n| [Trim](wrangler-docs/directives/trim.md)                                        | Functions for trimming white spaces around string data           |\n| **Encoders and Decoders**                                              |                                                                  |\n| [Decode](wrangler-docs/directives/decode.md)                                    | Decodes a column value as one of `base32`, `base64`, or `hex`    |\n| [Encode](wrangler-docs/directives/encode.md)                                    | Encodes a column value as one of `base32`, `base64`, or `hex`    |\n| **Unique ID**                                                          |                                                                  |\n| [UUID Generation](wrangler-docs/directives/generate-uuid.md)                    | Generates a universally unique identifier (UUID) .Recommended to use with Wrangler version 4.4.0 and above due to an important bug fix [CDAP-17732](https://cdap.atlassian.net/browse/CDAP-17732)             |\n| **Date Transformations**                                               |                                                                  |\n| [Diff Date](wrangler-docs/directives/diff-date.md)                              | Calculates the difference between two dates                      |\n| [Format Date](wrangler-docs/directives/format-date.md)                          | Custom patterns for date-time formatting                         |\n| [Format Unix Timestamp](wrangler-docs/directives/format-unix-timestamp.md)      | Formats a UNIX timestamp as a date                               |\n| **DateTime Transformations**                                                    |                                                                  |\n| [Current DateTime](wrangler-docs/directives/current-datetime.md)                | Generates the current datetime using the given zone or UTC by default|\n| [Datetime To Timestamp](wrangler-docs/directives/datetime-to-timestamp.md)      | Converts a datetime value to timestamp with the given zone       |\n| [Format Datetime](wrangler-docs/directives/format-datetime.md)                  | Formats a datetime value to custom date time pattern strings     |\n| [Timestamp To Datetime](wrangler-docs/directives/timestamp-to-datetime.md)      | Converts a timestamp value to datetime                           |\n| **Lookups**                                                            |                                                                  |\n| [Catalog Lookup](wrangler-docs/directives/catalog-lookup.md)                    | Static catalog lookup of ICD-9, ICD-10-2016, ICD-10-2017 codes   |\n| [Table Lookup](wrangler-docs/directives/table-lookup.md)                        | Performs lookups into Table datasets                             |\n| **Hashing \u0026 Masking**                                                  |                                                                  |\n| [Message Digest or Hash](wrangler-docs/directives/hash.md)                      | Generates a message digest                                       |\n| [Mask Number](wrangler-docs/directives/mask-number.md)                          | Applies substitution masking on the column values                |\n| [Mask Shuffle](wrangler-docs/directives/mask-shuffle.md)                        | Applies shuffle masking on the column values                     |\n| **Row Operations**                                                     |                                                                  |\n| [Filter Row if Matched](wrangler-docs/directives/filter-row-if-matched.md)      | Filters rows that match a pattern for a column                                         |\n| [Filter Row if True](wrangler-docs/directives/filter-row-if-true.md)            | Filters rows if the condition is true.                                                  |\n| [Filter Row Empty of Null](wrangler-docs/directives/filter-empty-or-null.md)    | Filters rows that are empty of null.                    |\n| [Flatten](wrangler-docs/directives/flatten.md)                                  | Separates the elements in a repeated field                       |\n| [Fail on condition](wrangler-docs/directives/fail.md)                           | Fails processing when the condition is evaluated to true.        |\n| [Send to Error](wrangler-docs/directives/send-to-error.md)                      | Filtering of records to an error collector                       |\n| [Send to Error And Continue](wrangler-docs/directives/send-to-error-and-continue.md) | Filtering of records to an error collector and continues processing                      |\n| [Split to Rows](wrangler-docs/directives/split-to-rows.md)                      | Splits based on a separator into multiple records                |\n| **Column Operations**                                                  |                                                                  |\n| [Change Column Case](wrangler-docs/directives/change-column-case.md)            | Changes column names to either lowercase or uppercase            |\n| [Changing Case](wrangler-docs/directives/changing-case.md)                      | Change the case of column values                                 |\n| [Cleanse Column Names](wrangler-docs/directives/cleanse-column-names.md)        | Sanatizes column names, following specific rules                 |\n| [Columns Replace](wrangler-docs/directives/columns-replace.md)                  | Alters column names in bulk                                      |\n| [Copy](wrangler-docs/directives/copy.md)                                        | Copies values from a source column into a destination column     |\n| [Drop Column](wrangler-docs/directives/drop.md)                                 | Drops a column in a record                                       |\n| [Fill Null or Empty Columns](wrangler-docs/directives/fill-null-or-empty.md)    | Fills column value with a fixed value if null or empty           |\n| [Keep Columns](wrangler-docs/directives/keep.md)                                | Keeps specified columns from the record                          |\n| [Merge Columns](wrangler-docs/directives/merge.md)                              | Merges two columns by inserting a third column                   |\n| [Rename Column](wrangler-docs/directives/rename.md)                             | Renames an existing column in the record                         |\n| [Set Column Header](wrangler-docs/directives/set-headers.md)                     | Sets the names of columns, in the order they are specified       |\n| [Split to Columns](wrangler-docs/directives/split-to-columns.md)                | Splits a column based on a separator into multiple columns       |\n| [Swap Columns](wrangler-docs/directives/swap.md)                                | Swaps column names of two columns                                |\n| [Set Column Data Type](wrangler-docs/directives/set-type.md)                    | Convert data type of a column                                    |\n| **NLP**                                                                |                                                                  |\n| [Stemming Tokenized Words](wrangler-docs/directives/stemming.md)                | Applies the Porter stemmer algorithm for English words           |\n| **Transient Aggregators \u0026 Setters**                                    |                                                                  |\n| [Increment Variable](wrangler-docs/directives/increment-variable.md)            | Increments a transient variable with a record of processing.     |\n| [Set Variable](wrangler-docs/directives/set-variable.md)                        | Sets a transient variable with a record of processing.     |\n| **Functions**                                                          |                                                                  |\n| [Data Quality](wrangler-docs/functions/dq-functions.md)                         | Data quality check functions. Checks for date, time, etc.        |\n| [Date Manipulations](wrangler-docs/functions/date-functions.md)                 | Functions that can manipulate date                               |\n| [DDL](wrangler-docs/functions/ddl-functions.md)                                 | Functions that can manipulate definition of data                 |\n| [JSON](wrangler-docs/functions/json-functions.md)                               | Functions that can be useful in transforming your data           |\n| [Types](wrangler-docs/functions/type-functions.md)                              | Functions for detecting the type of data                         |\n\n## Performance\n\nInitial performance tests show that with a set of directives of high complexity for\ntransforming data, *DataPrep* is able to process at about ~106K records per second. The\nrates below are specified as *records/second*. \n\n| Directive Complexity | Column Count |    Records |           Size | Mean Rate |\n| -------------------- | :----------: | ---------: | -------------: | --------: |\n| High (167 Directives) |      426      | 127,946,398 |  82,677,845,324 | 106,367.27 |\n| High (167 Directives) |      426      | 511,785,592 | 330,711,381,296 | 105,768.93 |\n\n\n## Contact\n\n### Mailing Lists\n\nCDAP User Group and Development Discussions:\n\n* [cdap-user@googlegroups.com](https://groups.google.com/d/forum/cdap-user)\n\nThe *cdap-user* mailing list is primarily for users using the product to develop\napplications or building plugins for appplications. You can expect questions from\nusers, release announcements, and any other discussions that we think will be helpful\nto the users.\n\n### IRC Channel\n\nCDAP IRC Channel: [#cdap on irc.freenode.net](http://webchat.freenode.net?channels=%23cdap)\n\n### Slack Team\n\nCDAP Users on Slack: [cdap-users team](https://cdap-users.herokuapp.com)\n\n\n## License and Trademarks\n\nCopyright © 2016-2019 Cask Data, Inc.\n\nLicensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except\nin compliance with the License. You may obtain a copy of the License at\n\nhttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software distributed under the\nLicense is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,\neither express or implied. See the License for the specific language governing permissions\nand limitations under the License.\n\nCask is a trademark of Cask Data, Inc. All rights reserved.\n\nApache, Apache HBase, and HBase are trademarks of The Apache Software Foundation. Used with\npermission. No endorsement by The Apache Software Foundation is implied by the use of these marks.\n","funding_links":[],"categories":["大数据"],"sub_categories":[],"project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdata-integrations%2Fwrangler","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fdata-integrations%2Fwrangler","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fdata-integrations%2Fwrangler/lists"}