Ecosyste.ms: Awesome

An open API service indexing awesome lists of open source software.

Awesome Lists | Featured Topics | Projects

https://github.com/princemaple/abnf_parsec

ABNF in, parser out
https://github.com/princemaple/abnf_parsec

abnf elixir parser-generator parsing

Last synced: 3 days ago
JSON representation

ABNF in, parser out

Awesome Lists containing this project

README

        

# AbnfParsec

[![hex.pm](https://img.shields.io/hexpm/v/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![hex.pm](https://img.shields.io/hexpm/dt/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![hex.pm](https://img.shields.io/hexpm/l/abnf_parsec.svg)](https://hex.pm/packages/abnf_parsec)
[![github.com](https://img.shields.io/github/last-commit/princemaple/abnf_parsec.svg)](https://github.com/princemaple/abnf_parsec)

### ABNF in and parser out.

Parses ABNF with a parser written with `nimble_parsec`, emits parser consists of `nimble_parsec` combinators.

## Features

- Brevity - flattens unnecessary nesting in parsed ABNF
- Easy to config and customize
- Full test coverage

## Installation

The [package](https://hex.pm/packages/abnf_parsec) is available on [Hex](https://hex.pm)
and can be installed by adding `abnf_parsec` to your list of dependencies in `mix.exs`:

```elixir
def deps do
[
{:abnf_parsec, "~> 1.0", runtime: false}
]
end
```

Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at [https://hexdocs.pm/abnf_parsec](https://hexdocs.pm/abnf_parsec).

## Text / Byte mode

In some RFCs, literals in ABNF could be used to describe the byte representation instead of
the text codepoints. There is no clear distinction between them for us to detect automatically.
Hence a text / byte mode is added for the user to set.

```elixir
defmodule TextParser do
use AbnfParsec,
mode: :text, # the default, can be omitted
abnf: """
ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
/ %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
/ %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
/ %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
/ %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
/ %xD0000-DFFFD / %xE1000-EFFFD
"""
end

defmodule ByteParser do
use AbnfParsec,
mode: :byte,
abnf: """
; from RFC 5322 + UTF8-non-ascii
atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /
"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~" / UTF8-non-ascii

; from RFC 3629
UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4

UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF
"""
end
```

## Usage

```elixir
defmodule IPv4Parser do
use AbnfParsec,
abnf: """
ip = first dot second dot third dot fourth
dot = "."
dec-octet =
"25" %x30-35 / ; 250-255
"2" %x30-34 DIGIT / ; 200-249
"1" 2DIGIT / ; 100-199
%x31-39 DIGIT / ; 10-99
DIGIT ; 0-9
first = dec-octet
second = dec-octet
third = dec-octet
fourth = dec-octet
""",
unbox: ["dec-octet"],
ignore: ["dot"],
parse: :ip
end

# IPv4Parser.ip("192.168.0.1")
# IPv4Parser.parse("127.0.0.1")
# IPv4Parser.parse!("10.0.0.1")

iex> IPv4Parser.parse! "10.0.0.1"
[ip: [first: '10', second: '0', third: '0', fourth: '1']]
```

```elixir
defmodule JsonParser do
use AbnfParsec,
abnf_file: "test/fixture/json.abnf",
parse: :json_text,
transform: %{
"string" => {:reduce, {List, :to_string, []}},
"int" => [{:reduce, {List, :to_string, []}}, {:map, {String, :to_integer, []}}],
"frac" => {:reduce, {List, :to_string, []}},
"null" => {:replace, nil},
"true" => {:replace, true},
"false" => {:replace, false}
},
untag: ["member"],
unwrap: ["int", "frac"],
unbox: [
"JSON-text",
"null",
"true",
"false",
"digit1-9",
"decimal-point",
"escape",
"unescaped",
"char"
],
ignore: [
"name-separator",
"value-separator",
"quotation-mark",
"begin-object",
"end-object",
"begin-array",
"end-array"
]
end

json = """
{"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
"""

# JsonParser.json_text(json)
# JsonParser.parse(json)
# JsonParser.parse!(json)

iex> JsonParser.parse! """
...> {"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
...> """
[
object: [
[
string: ["a"],
value: [
object: [
[string: ["b"], value: [number: [int: 1]]],
[string: ["c"], value: [array: [value: [true]]]]
]
]
],
[string: ["d"], value: [nil]],
[string: ["e"], value: [string: ["e\\te"]]]
]
]
```

For more details of options for customization, see [abnf_parsec.ex](https://github.com/princemaple/abnf_parsec/blob/main/lib/abnf_parsec.ex)

## What does it do, really?

For example, ABNF of ABNF 😉 is parsed

```
rulelist = 1*( rule / (*c-wsp c-nl) )

rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space

rulename = ALPHA *(ALPHA / DIGIT / "-")

defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternatives

elements = alternation *c-wsp

c-wsp = WSP / (c-nl WSP)

c-nl = comment / CRLF
; comment or newline

comment = ";" *(WSP / VCHAR) CRLF

alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)

concatenation = repetition *(1*c-wsp repetition)

repetition = [repeat] element

repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)

element = rulename / group / option /
char-val / num-val / prose-val

group = "(" *c-wsp alternation *c-wsp ")"

option = "[" *c-wsp alternation *c-wsp "]"

char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTE

num-val = "%" (bin-val / dec-val / hex-val)

bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range

dec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]

hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]

prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
```

into

```elixir
[
rule: [
rulename: "rulelist",
repetition: [
repeat: [min: 1],
alternation: [
rulename: "rule",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
rulename: "c-nl"
]
]
]
],
rule: [
rulename: "rule",
concatenation: [
rulename: "rulename",
rulename: "defined-as",
rulename: "elements",
rulename: "c-nl"
],
comment: "continues if next line starts",
comment: "with white space"
],
rule: [
rulename: "rulename",
concatenation: [
rulename: "ALPHA",
repetition: [
repeat: [],
alternation: [{:rulename, "ALPHA"}, {:rulename, "DIGIT"}, "-"]
]
]
],
rule: [
rulename: "defined-as",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
alternation: ["=", "=/"],
repetition: [repeat: [], rulename: "c-wsp"]
],
comment: "basic rules definition and",
comment: "incremental alternatives"
],
rule: [
rulename: "elements",
concatenation: [
rulename: "alternation",
repetition: [repeat: [], rulename: "c-wsp"]
]
],
rule: [
rulename: "c-wsp",
alternation: [
rulename: "WSP",
concatenation: [rulename: "c-nl", rulename: "WSP"]
]
],
rule: [
rulename: "c-nl",
alternation: [rulename: "comment", rulename: "CRLF"],
comment: "comment or newline"
],
rule: [
rulename: "comment",
concatenation: [
";",
{:repetition,
[repeat: [], alternation: [rulename: "WSP", rulename: "VCHAR"]]},
{:rulename, "CRLF"}
]
],
rule: [
rulename: "alternation",
concatenation: [
rulename: "concatenation",
repetition: [
repeat: [],
concatenation: [
{:repetition, [repeat: [], rulename: "c-wsp"]},
"/",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "concatenation"}
]
]
]
],
rule: [
rulename: "concatenation",
concatenation: [
rulename: "repetition",
repetition: [
repeat: [],
concatenation: [
repetition: [repeat: [min: 1], rulename: "c-wsp"],
rulename: "repetition"
]
]
]
],
rule: [
rulename: "repetition",
concatenation: [option: [rulename: "repeat"], rulename: "element"]
],
rule: [
rulename: "repeat",
alternation: [
repetition: [repeat: [min: 1], rulename: "DIGIT"],
concatenation: [
{:repetition, [repeat: [], rulename: "DIGIT"]},
"*",
{:repetition, [repeat: [], rulename: "DIGIT"]}
]
]
],
rule: [
rulename: "element",
alternation: [
rulename: "rulename",
rulename: "group",
rulename: "option",
rulename: "char-val",
rulename: "num-val",
rulename: "prose-val"
]
],
rule: [
rulename: "group",
concatenation: [
"(",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
")"
]
],
rule: [
rulename: "option",
concatenation: [
"[",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
"]"
]
],
rule: [
rulename: "char-val",
concatenation: [
rulename: "DQUOTE",
repetition: [
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "21"],
num_range: [{:base, "x"}, "23", "7E"]
]
],
rulename: "DQUOTE"
],
comment: "quoted string of SP and VCHAR",
comment: "without DQUOTE"
],
rule: [
rulename: "num-val",
concatenation: [
"%",
{:alternation,
[rulename: "bin-val", rulename: "dec-val", rulename: "hex-val"]}
]
],
rule: [
rulename: "bin-val",
concatenation: [
"b",
{:repetition, [repeat: [min: 1], rulename: "BIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
]
]}
],
comment: "series of concatenated bit values",
comment: "or single ONEOF range"
],
rule: [
rulename: "dec-val",
concatenation: [
"d",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
]
]}
]
],
rule: [
rulename: "hex-val",
concatenation: [
"x",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
]
]}
]
],
rule: [
rulename: "prose-val",
concatenation: [
"<",
{:repetition,
[
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "3D"],
num_range: [{:base, "x"}, "3F", "7E"]
]
]},
">"
],
comment: "bracketed string of SP and VCHAR",
comment: "without angles",
comment: "prose description, to be used as",
comment: "last resort"
]
]
```

And generated parser looks like:

```elixir
[
defparsec(
:rulelist,
tag(
times(choice([parsec(:rule), repeat(parsec(:c_wsp)) |> parsec(:c_nl)]), min: 1),
:rulelist
)
),
defparsec(
:rule,
tag(parsec(:rulename) |> parsec(:defined_as) |> parsec(:elements) |> parsec(:c_nl), :rule)
),
defparsec(
:rulename,
tag(
parsec(:core_alpha)
|> repeat(choice([parsec(:core_alpha), parsec(:core_digit), string("-")])),
:rulename
)
),
defparsec(
:defined_as,
tag(
repeat(parsec(:c_wsp)) |> choice([string("="), string("=/")]) |> repeat(parsec(:c_wsp)),
:defined_as
)
),
defparsec(:elements, tag(parsec(:alternation) |> repeat(parsec(:c_wsp)), :elements)),
defparsec(:c_wsp, tag(choice([parsec(:core_wsp), parsec(:c_nl) |> parsec(:core_wsp)]), :c_wsp)),
defparsec(:c_nl, tag(choice([parsec(:comment), parsec(:core_crlf)]), :c_nl)),
defparsec(
:comment,
tag(
string(";")
|> repeat(choice([parsec(:core_wsp), parsec(:core_vchar)]))
|> parsec(:core_crlf),
:comment
)
),
defparsec(
:alternation,
tag(
parsec(:concatenation)
|> repeat(
repeat(parsec(:c_wsp))
|> string("/")
|> repeat(parsec(:c_wsp))
|> parsec(:concatenation)
),
:alternation
)
),
defparsec(
:concatenation,
tag(
parsec(:repetition) |> repeat(times(parsec(:c_wsp), min: 1) |> parsec(:repetition)),
:concatenation
)
),
defparsec(:repetition, tag(optional(parsec(:repeat)) |> parsec(:element), :repetition)),
defparsec(
:repeat,
tag(
choice([
times(parsec(:core_digit), min: 1),
repeat(parsec(:core_digit)) |> string("*") |> repeat(parsec(:core_digit))
]),
:repeat
)
),
defparsec(
:element,
tag(
choice([
parsec(:rulename),
parsec(:group),
parsec(:option),
parsec(:char_val),
parsec(:num_val),
parsec(:prose_val)
]),
:element
)
),
defparsec(
:group,
tag(
string("(")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string(")"),
:group
)
),
defparsec(
:option,
tag(
string("[")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string("]"),
:option
)
),
defparsec(
:char_val,
tag(
parsec(:core_dquote)
|> repeat(choice([ascii_char([32..33]), ascii_char([35..126])]))
|> parsec(:core_dquote),
:char_val
)
),
defparsec(
:num_val,
tag(string("%") |> choice([parsec(:bin_val), parsec(:dec_val), parsec(:hex_val)]), :num_val)
),
defparsec(
:bin_val,
tag(
string("b")
|> times(parsec(:core_bit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_bit), min: 1), min: 1),
string("-") |> times(parsec(:core_bit), min: 1)
])
),
:bin_val
)
),
defparsec(
:dec_val,
tag(
string("d")
|> times(parsec(:core_digit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_digit), min: 1), min: 1),
string("-") |> times(parsec(:core_digit), min: 1)
])
),
:dec_val
)
),
defparsec(
:hex_val,
tag(
string("x")
|> times(parsec(:core_hexdig), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_hexdig), min: 1), min: 1),
string("-") |> times(parsec(:core_hexdig), min: 1)
])
),
:hex_val
)
),
defparsec(
:prose_val,
tag(
string("<") |> repeat(choice([ascii_char([32..61]), ascii_char([63..126])])) |> string(">"),
:prose_val
)
)
]
```