https://github.com/xmikos/reparser
Simple regex-based lexer/parser for inline markup
https://github.com/xmikos/reparser
Last synced: 4 months ago
JSON representation
Simple regex-based lexer/parser for inline markup
- Host: GitHub
- URL: https://github.com/xmikos/reparser
- Owner: xmikos
- License: mit
- Created: 2015-04-16T13:20:55.000Z (about 10 years ago)
- Default Branch: master
- Last Pushed: 2020-02-06T21:04:05.000Z (over 5 years ago)
- Last Synced: 2024-10-04T00:45:53.160Z (8 months ago)
- Language: Python
- Size: 201 KB
- Stars: 9
- Watchers: 2
- Forks: 1
- Open Issues: 1
-
Metadata Files:
- Readme: README.rst
- License: LICENSE
Awesome Lists containing this project
README
ReParser
========Simple regex-based lexer/parser for inline markup
Requirements
------------- Python 3
Usage
-----Example::
import re
from pprint import pprint
from reparser import Parser, Token, MatchGroupboundary_chars = r'\s`!()\[\]{{}};:\'".,<>?«»“”‘’*_~='
b_left = r'(?:(?<=[' + boundary_chars + r'])|(?<=^))' # Lookbehind
b_right = r'(?:(?=[' + boundary_chars + r'])|(?=$))' # Lookaheadmarkdown_start = b_left + r'(?.+?)\]\((?P.+?)\)'
newline = r'\n|\r\n'url_proto_regex = re.compile(r'(?i)^[a-z][\w-]+:/{1,3}')
def markdown(tag):
"""Return sequence of start and end regex patterns for simple Markdown tag"""
return (markdown_start.format(tag=tag), markdown_end.format(tag=tag))def url_complete(url):
"""If URL doesn't start with protocol, prepend it with http://"""
return url if url_proto_regex.search(url) else 'http://' + urltokens = [
Token('bi1', *markdown(r'\*\*\*'), is_bold=True, is_italic=True),
Token('bi2', *markdown(r'___'), is_bold=True, is_italic=True),
Token('b1', *markdown(r'\*\*'), is_bold=True),
Token('b2', *markdown(r'__'), is_bold=True),
Token('i1', *markdown(r'\*'), is_italic=True),
Token('i2', *markdown(r'_'), is_italic=True),
Token('pre3', *markdown(r'```'), skip=True),
Token('pre2', *markdown(r'``'), skip=True),
Token('pre1', *markdown(r'`'), skip=True),
Token('s', *markdown(r'~~'), is_strikethrough=True),
Token('u', *markdown(r'=='), is_underline=True),
Token('link', markdown_link, text=MatchGroup('link'),
link_target=MatchGroup('url', func=url_complete)),
Token('br', newline, text='\n', segment_type="LINE_BREAK")
]parser = Parser(tokens)
text = ('Hello **bold** world!\n'
'You can **try *this* awesome** [link](www.eff.org).')segments = parser.parse(text)
pprint([(segment.text, segment.params) for segment in segments])Output::
[('Hello ', {}),
('bold', {'is_bold': True}),
(' world!', {}),
('\n', {'segment_type': 'LINE_BREAK'}),
('You can ', {}),
('try ', {'is_bold': True}),
('this', {'is_bold': True, 'is_italic': True}),
(' awesome', {'is_bold': True}),
(' ', {}),
('link', {'link_target': 'http://www.eff.org'}),
('.', {})]