https://github.com/romshark/icumsg
Fast ICU Message format parser/tokenizer for Go
https://github.com/romshark/icumsg
Last synced: 3 months ago
JSON representation
Fast ICU Message format parser/tokenizer for Go
- Host: GitHub
- URL: https://github.com/romshark/icumsg
- Owner: romshark
- License: mit
- Created: 2025-04-12T12:54:13.000Z (6 months ago)
- Default Branch: main
- Last Pushed: 2025-06-15T18:32:06.000Z (4 months ago)
- Last Synced: 2025-06-23T01:11:24.206Z (4 months ago)
- Language: Go
- Homepage:
- Size: 209 KB
- Stars: 1
- Watchers: 1
- Forks: 0
- Open Issues: 0
-
Metadata Files:
- Readme: README.md
- License: LICENSE
Awesome Lists containing this project
README
# icumsg
This Go module provides an efficient
[ICU Message Format](https://unicode-org.github.io/icu/userguide/format_parse/messages/)
tokenizer.https://go.dev/play/p/y7OA1YK2Wn4
```go
package mainimport (
"fmt"
"os""github.com/romshark/icumsg"
"golang.org/x/text/language"
)func main() {
msg := `Hello {arg} ({rank, ordinal})!`var tokenizer icumsg.Tokenizer
tokens, err := tokenizer.Tokenize(language.English, nil, msg)
if err != nil {
fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
os.Exit(1)
}fmt.Printf("token (%d):\n", len(tokens))
for i, token := range tokens {
fmt.Printf(" %d (%s): %q\n", i,
token.Type.String(), token.String(msg, tokens))
}// output:
// token (8):
// 0 (literal): "Hello "
// 1 (simple argument): "{arg}"
// 2 (argument name): "arg"
// 3 (literal): " ("
// 4 (simple argument): "{rank, ordinal}"
// 5 (argument name): "rank"
// 6 (argument type ordinal): "ordinal"
// 7 (literal): ")!"
}
```## Error handling
https://go.dev/play/p/NI6gXkcJJcH
```go
package mainimport (
"fmt""github.com/romshark/icumsg"
"golang.org/x/text/language"
)func main() {
// The English language only supports the 'one' and 'other' CLDR plural rules.
msg := `{numMsgs,plural, one{# message} other{# messages} few{this is wrong}}`var tokenizer icumsg.Tokenizer
_, err := tokenizer.Tokenize(language.English, nil, msg)
if err != nil {
fmt.Printf("Error at index %d: %v\n", tokenizer.Pos(), err)
}// output:
// Error at index 50: plural rule unsupported for locale
}
```## Semantic Analysis
ICU messages can be syntactically valid yet incomplete when missing `select`, `plural` or
`selectordinal` options required by the locale as well as semantically invalid when
featuring unsupported `select` options.
`icumsg.Analyze` allows you to inspect a message in detail and discover semantic issues.https://go.dev/play/p/U9t0a0XH9U_h
```go
package mainimport (
"fmt""github.com/romshark/icumsg"
"golang.org/x/text/language"
)var optionsForVarGender = func(argName string) (
options []string,
policyPresence icumsg.OptionsPresencePolicy,
policyUnknown icumsg.OptionUnknownPolicy,
) {
if argName == "varGender" {
// Apply these policies and options only for argument "varGender"
policyPresence = icumsg.OptionsPresencePolicyRequired
policyUnknown = icumsg.OptionUnknownPolicyReject
// Option "other" doesn't need to be included because it's always required.
return []string{"male", "female"}, policyPresence, policyUnknown
}
return nil, 0, 0
}func main() {
locale := language.English// varGender lists unsupported option "unknown"
msg := `This message is valid but has incomplete plural and unknown select options:
missing one: {varNum, plural,
other{
missing male: {varGender, select,
unknown{
varNum[other],varGender[unknown]
}
female{
varNum[other],varGender[female]
}
other{
varNum[other],varGender[other]
}
}
}
}
complete: {varNum, plural,
one{-}
other{-}
}`var tokenizer icumsg.Tokenizer
tokens, err := tokenizer.Tokenize(locale, nil, msg)
if err != nil {
fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
os.Exit(1)
}// Option "other" doesn't need to be included because it's always required.
optionsForVarGender := []string{"male", "female"}var incomplete, rejected []string
totalChoices, err := icumsg.Analyze(locale, msg, tokens,
func(argName string) (
options []string,
policyPresence icumsg.OptionsPresencePolicy,
policyUnknown icumsg.OptionUnknownPolicy,
) {
if argName == "varGender" {
// Apply these policies and options only for argument "varGender"
policyPresence = icumsg.OptionsPresencePolicyRequired
policyUnknown = icumsg.OptionUnknownPolicyReject
return optionsForVarGender, policyPresence, policyUnknown
}
return nil, 0, 0
}, func(index int) error {
// This is called when an incomplete choice is encountered.
tArg, tName := tokens[index], tokens[index+1]
incomplete = append(incomplete,
tArg.Type.String()+": "+tName.String(msg, tokens))
return nil
}, func(indexArgument, indexOption int) error {
// This is called when a rejected option is encountered.
tArg, tName := tokens[indexArgument+1], tokens[indexOption+1]
rejected = append(rejected, fmt.Sprintf("%q: option %q",
tArg.String(msg, tokens), tName.String(msg, tokens)))
return nil
})
if err != nil {
panic(err)
}fmt.Printf("totalChoices: %d\n", totalChoices)
fmt.Printf("incomplete (%d):\n", len(incomplete))
for _, s := range incomplete {
fmt.Printf(" %s\n", s)
}
fmt.Printf("rejected (%d):\n", len(rejected))
for _, s := range rejected {
fmt.Printf(" %s\n", s)
}{
total := float64(totalChoices)
incomplete := float64(len(incomplete))
complete := total - incomplete
percent := complete / total
fmt.Printf("completeness: %.2f%%\n", percent*100)
}// output:
// totalChoices: 3
// incomplete (2):
// select argument: varGender
// plural argument: varNum
// rejected (1):
// "varGender": option "unknown"
// completeness: 33.33%
}
```