Ecosyste.ms: Awesome
An open API service indexing awesome lists of open source software.
https://github.com/princemaple/abnf_parsec
ABNF in, parser out
https://github.com/princemaple/abnf_parsec
abnf elixir parser-generator parsing
Last synced: 3 days ago
JSON representation
ABNF in, parser out
- Host: GitHub
- URL: https://github.com/princemaple/abnf_parsec
- Owner: princemaple
- License: mit
- Created: 2020-01-20T12:35:50.000Z (about 5 years ago)
- Default Branch: main
- Last Pushed: 2025-01-30T01:11:43.000Z (13 days ago)
- Last Synced: 2025-02-02T01:06:47.064Z (10 days ago)
- Topics: abnf, elixir, parser-generator, parsing
- Language: Elixir
- Homepage: https://hexdocs.pm/abnf_parsec/AbnfParsec.html
- Size: 166 KB
- Stars: 51
- Watchers: 4
- Forks: 3
- Open Issues: 4
-
Metadata Files:
- Readme: README.md
- Changelog: CHANGELOG.md
- License: LICENSE
Awesome Lists containing this project
- freaking_awesome_elixir - Elixir - ABNF in and parser out. (Lexical analysis)
- fucking-awesome-elixir - abnf_parsec - ABNF in and parser out. (Lexical analysis)
- awesome-elixir - abnf_parsec - ABNF in and parser out. (Lexical analysis)
README
# AbnfParsec
[![hex.pm](https://img.shields.io/hexpm/v/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![hex.pm](https://img.shields.io/hexpm/dt/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![hex.pm](https://img.shields.io/hexpm/l/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![github.com](https://img.shields.io/github/last-commit/princemaple/abnf_parsec.svg)](https://github.com/princemaple/abnf_parsec)### ABNF in and parser out.
Parses ABNF with a parser written with `nimble_parsec`, emits parser consists of `nimble_parsec` combinators.
## Features
- Brevity - flattens unnecessary nesting in parsed ABNF
- Easy to config and customize
- Full test coverage## Installation
The [package](https://hex.pm/packages/abnf_parsec) is available on [Hex](https://hex.pm)
and can be installed by adding `abnf_parsec` to your list of dependencies in `mix.exs`:```elixir
def deps do
[
{:abnf_parsec, "~> 1.0", runtime: false}
]
end
```Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at [https://hexdocs.pm/abnf_parsec](https://hexdocs.pm/abnf_parsec).## Text / Byte mode
In some RFCs, literals in ABNF could be used to describe the byte representation instead of
the text codepoints. There is no clear distinction between them for us to detect automatically.
Hence a text / byte mode is added for the user to set.```elixir
defmodule TextParser do
use AbnfParsec,
mode: :text, # the default, can be omitted
abnf: """
ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
/ %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
/ %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
/ %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
/ %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
/ %xD0000-DFFFD / %xE1000-EFFFD
"""
enddefmodule ByteParser do
use AbnfParsec,
mode: :byte,
abnf: """
; from RFC 5322 + UTF8-non-ascii
atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /
"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~" / UTF8-non-ascii; from RFC 3629
UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF
"""
end
```## Usage
```elixir
defmodule IPv4Parser do
use AbnfParsec,
abnf: """
ip = first dot second dot third dot fourth
dot = "."
dec-octet =
"25" %x30-35 / ; 250-255
"2" %x30-34 DIGIT / ; 200-249
"1" 2DIGIT / ; 100-199
%x31-39 DIGIT / ; 10-99
DIGIT ; 0-9
first = dec-octet
second = dec-octet
third = dec-octet
fourth = dec-octet
""",
unbox: ["dec-octet"],
ignore: ["dot"],
parse: :ip
end# IPv4Parser.ip("192.168.0.1")
# IPv4Parser.parse("127.0.0.1")
# IPv4Parser.parse!("10.0.0.1")iex> IPv4Parser.parse! "10.0.0.1"
[ip: [first: '10', second: '0', third: '0', fourth: '1']]
``````elixir
defmodule JsonParser do
use AbnfParsec,
abnf_file: "test/fixture/json.abnf",
parse: :json_text,
transform: %{
"string" => {:reduce, {List, :to_string, []}},
"int" => [{:reduce, {List, :to_string, []}}, {:map, {String, :to_integer, []}}],
"frac" => {:reduce, {List, :to_string, []}},
"null" => {:replace, nil},
"true" => {:replace, true},
"false" => {:replace, false}
},
untag: ["member"],
unwrap: ["int", "frac"],
unbox: [
"JSON-text",
"null",
"true",
"false",
"digit1-9",
"decimal-point",
"escape",
"unescaped",
"char"
],
ignore: [
"name-separator",
"value-separator",
"quotation-mark",
"begin-object",
"end-object",
"begin-array",
"end-array"
]
endjson = """
{"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
"""# JsonParser.json_text(json)
# JsonParser.parse(json)
# JsonParser.parse!(json)iex> JsonParser.parse! """
...> {"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
...> """
[
object: [
[
string: ["a"],
value: [
object: [
[string: ["b"], value: [number: [int: 1]]],
[string: ["c"], value: [array: [value: [true]]]]
]
]
],
[string: ["d"], value: [nil]],
[string: ["e"], value: [string: ["e\\te"]]]
]
]
```For more details of options for customization, see [abnf_parsec.ex](https://github.com/princemaple/abnf_parsec/blob/main/lib/abnf_parsec.ex)
## What does it do, really?
For example, ABNF of ABNF 😉 is parsed
```
rulelist = 1*( rule / (*c-wsp c-nl) )rule = rulename defined-as elements c-nl
; continues if next line starts
; with white spacerulename = ALPHA *(ALPHA / DIGIT / "-")
defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternativeselements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newlinecomment = ";" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-valgroup = "(" *c-wsp alternation *c-wsp ")"
option = "[" *c-wsp alternation *c-wsp "]"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTEnum-val = "%" (bin-val / dec-val / hex-val)
bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF rangedec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
```into
```elixir
[
rule: [
rulename: "rulelist",
repetition: [
repeat: [min: 1],
alternation: [
rulename: "rule",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
rulename: "c-nl"
]
]
]
],
rule: [
rulename: "rule",
concatenation: [
rulename: "rulename",
rulename: "defined-as",
rulename: "elements",
rulename: "c-nl"
],
comment: "continues if next line starts",
comment: "with white space"
],
rule: [
rulename: "rulename",
concatenation: [
rulename: "ALPHA",
repetition: [
repeat: [],
alternation: [{:rulename, "ALPHA"}, {:rulename, "DIGIT"}, "-"]
]
]
],
rule: [
rulename: "defined-as",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
alternation: ["=", "=/"],
repetition: [repeat: [], rulename: "c-wsp"]
],
comment: "basic rules definition and",
comment: "incremental alternatives"
],
rule: [
rulename: "elements",
concatenation: [
rulename: "alternation",
repetition: [repeat: [], rulename: "c-wsp"]
]
],
rule: [
rulename: "c-wsp",
alternation: [
rulename: "WSP",
concatenation: [rulename: "c-nl", rulename: "WSP"]
]
],
rule: [
rulename: "c-nl",
alternation: [rulename: "comment", rulename: "CRLF"],
comment: "comment or newline"
],
rule: [
rulename: "comment",
concatenation: [
";",
{:repetition,
[repeat: [], alternation: [rulename: "WSP", rulename: "VCHAR"]]},
{:rulename, "CRLF"}
]
],
rule: [
rulename: "alternation",
concatenation: [
rulename: "concatenation",
repetition: [
repeat: [],
concatenation: [
{:repetition, [repeat: [], rulename: "c-wsp"]},
"/",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "concatenation"}
]
]
]
],
rule: [
rulename: "concatenation",
concatenation: [
rulename: "repetition",
repetition: [
repeat: [],
concatenation: [
repetition: [repeat: [min: 1], rulename: "c-wsp"],
rulename: "repetition"
]
]
]
],
rule: [
rulename: "repetition",
concatenation: [option: [rulename: "repeat"], rulename: "element"]
],
rule: [
rulename: "repeat",
alternation: [
repetition: [repeat: [min: 1], rulename: "DIGIT"],
concatenation: [
{:repetition, [repeat: [], rulename: "DIGIT"]},
"*",
{:repetition, [repeat: [], rulename: "DIGIT"]}
]
]
],
rule: [
rulename: "element",
alternation: [
rulename: "rulename",
rulename: "group",
rulename: "option",
rulename: "char-val",
rulename: "num-val",
rulename: "prose-val"
]
],
rule: [
rulename: "group",
concatenation: [
"(",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
")"
]
],
rule: [
rulename: "option",
concatenation: [
"[",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
"]"
]
],
rule: [
rulename: "char-val",
concatenation: [
rulename: "DQUOTE",
repetition: [
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "21"],
num_range: [{:base, "x"}, "23", "7E"]
]
],
rulename: "DQUOTE"
],
comment: "quoted string of SP and VCHAR",
comment: "without DQUOTE"
],
rule: [
rulename: "num-val",
concatenation: [
"%",
{:alternation,
[rulename: "bin-val", rulename: "dec-val", rulename: "hex-val"]}
]
],
rule: [
rulename: "bin-val",
concatenation: [
"b",
{:repetition, [repeat: [min: 1], rulename: "BIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
]
]}
],
comment: "series of concatenated bit values",
comment: "or single ONEOF range"
],
rule: [
rulename: "dec-val",
concatenation: [
"d",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
]
]}
]
],
rule: [
rulename: "hex-val",
concatenation: [
"x",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
]
]}
]
],
rule: [
rulename: "prose-val",
concatenation: [
"<",
{:repetition,
[
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "3D"],
num_range: [{:base, "x"}, "3F", "7E"]
]
]},
">"
],
comment: "bracketed string of SP and VCHAR",
comment: "without angles",
comment: "prose description, to be used as",
comment: "last resort"
]
]
```And generated parser looks like:
```elixir
[
defparsec(
:rulelist,
tag(
times(choice([parsec(:rule), repeat(parsec(:c_wsp)) |> parsec(:c_nl)]), min: 1),
:rulelist
)
),
defparsec(
:rule,
tag(parsec(:rulename) |> parsec(:defined_as) |> parsec(:elements) |> parsec(:c_nl), :rule)
),
defparsec(
:rulename,
tag(
parsec(:core_alpha)
|> repeat(choice([parsec(:core_alpha), parsec(:core_digit), string("-")])),
:rulename
)
),
defparsec(
:defined_as,
tag(
repeat(parsec(:c_wsp)) |> choice([string("="), string("=/")]) |> repeat(parsec(:c_wsp)),
:defined_as
)
),
defparsec(:elements, tag(parsec(:alternation) |> repeat(parsec(:c_wsp)), :elements)),
defparsec(:c_wsp, tag(choice([parsec(:core_wsp), parsec(:c_nl) |> parsec(:core_wsp)]), :c_wsp)),
defparsec(:c_nl, tag(choice([parsec(:comment), parsec(:core_crlf)]), :c_nl)),
defparsec(
:comment,
tag(
string(";")
|> repeat(choice([parsec(:core_wsp), parsec(:core_vchar)]))
|> parsec(:core_crlf),
:comment
)
),
defparsec(
:alternation,
tag(
parsec(:concatenation)
|> repeat(
repeat(parsec(:c_wsp))
|> string("/")
|> repeat(parsec(:c_wsp))
|> parsec(:concatenation)
),
:alternation
)
),
defparsec(
:concatenation,
tag(
parsec(:repetition) |> repeat(times(parsec(:c_wsp), min: 1) |> parsec(:repetition)),
:concatenation
)
),
defparsec(:repetition, tag(optional(parsec(:repeat)) |> parsec(:element), :repetition)),
defparsec(
:repeat,
tag(
choice([
times(parsec(:core_digit), min: 1),
repeat(parsec(:core_digit)) |> string("*") |> repeat(parsec(:core_digit))
]),
:repeat
)
),
defparsec(
:element,
tag(
choice([
parsec(:rulename),
parsec(:group),
parsec(:option),
parsec(:char_val),
parsec(:num_val),
parsec(:prose_val)
]),
:element
)
),
defparsec(
:group,
tag(
string("(")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string(")"),
:group
)
),
defparsec(
:option,
tag(
string("[")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string("]"),
:option
)
),
defparsec(
:char_val,
tag(
parsec(:core_dquote)
|> repeat(choice([ascii_char([32..33]), ascii_char([35..126])]))
|> parsec(:core_dquote),
:char_val
)
),
defparsec(
:num_val,
tag(string("%") |> choice([parsec(:bin_val), parsec(:dec_val), parsec(:hex_val)]), :num_val)
),
defparsec(
:bin_val,
tag(
string("b")
|> times(parsec(:core_bit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_bit), min: 1), min: 1),
string("-") |> times(parsec(:core_bit), min: 1)
])
),
:bin_val
)
),
defparsec(
:dec_val,
tag(
string("d")
|> times(parsec(:core_digit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_digit), min: 1), min: 1),
string("-") |> times(parsec(:core_digit), min: 1)
])
),
:dec_val
)
),
defparsec(
:hex_val,
tag(
string("x")
|> times(parsec(:core_hexdig), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_hexdig), min: 1), min: 1),
string("-") |> times(parsec(:core_hexdig), min: 1)
])
),
:hex_val
)
),
defparsec(
:prose_val,
tag(
string("<") |> repeat(choice([ascii_char([32..61]), ascii_char([63..126])])) |> string(">"),
:prose_val
)
)
]
```