From 5e8ec3015adb375d49ff39dfdd730e4d2d227b32 Mon Sep 17 00:00:00 2001 From: Oliver Marks Date: Sat, 3 Nov 2018 14:10:37 +0000 Subject: [PATCH] Refactored to use ints as token ids in place of strings --- eorg/const.py | 21 ++++++----- eorg/generate.py | 44 ++++++++++++---------- eorg/parser.py | 67 ++++++++++++++------------------- eorg/version.py | 2 +- examples/html-enaml/example.py | 2 - examples/html-enaml/output.html | 28 +++++++------- examples/html-plain/example.py | 24 ++---------- examples/html-plain/test.html | 37 ++++++------------ examples/raw/filtering.py | 9 +++++ readme.org | 26 +++++++++---- tests/test_documents.py | 17 +++++---- tests/test_html.py | 51 +++++++++++++------------ 12 files changed, 159 insertions(+), 169 deletions(-) create mode 100644 examples/raw/filtering.py diff --git a/eorg/const.py b/eorg/const.py index f21d3c3..458b3a5 100755 --- a/eorg/const.py +++ b/eorg/const.py @@ -1,6 +1,8 @@ +from eorg import tokens ESCAPE = ['\n'] -METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS'] + +METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS', 'FILETAGS', 'DATE'] t_META = r"^[#]\+(" + '|'.join(METADATA) +")\:" t_BLANK_LINE = '^\s*$' t_COMMENT_BEGIN = r"^\#\+BEGIN_COMMENT" @@ -18,17 +20,18 @@ t_HEADER = r"^\*+" # Start regex, End regex, skip start, skip end, count matches TOKENS = { - "META": (t_META, False, 2, -1, False), - "COMMENT": (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False), - "EXAMPLE": (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False), - "IMG": (t_IMG, False, 2, None, False), - "CAPTION": (t_CAPTIONS, False, 2, None, False), - "SRC_BEGIN": (t_SRC_BEGIN, t_SRC_END, 2, None, False), - "RESULTS": (t_SRC_BEGIN, t_SRC_END, 2, None, False), - "HEADER": (t_HEADER, False, 1, None, True), + tokens.META: (t_META, False, 2, -1, False), + tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False), + tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False), + tokens.IMAGE: (t_IMG, False, 2, None, False), + tokens.CAPTION: (t_CAPTIONS, False, 2, None, False), + tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False), + tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False), + tokens.HEADER: (t_HEADER, False, 1, None, True), } + class Token: __slots__ = ["token", "value"] diff --git a/eorg/generate.py b/eorg/generate.py index 52c860e..6a14376 100644 --- a/eorg/generate.py +++ b/eorg/generate.py @@ -1,5 +1,7 @@ from io import StringIO from eorg.const import Token, ESCAPE +from eorg import tokens +from eorg.tokens import Token from pygments import highlight from pygments.lexers import PythonLexer from pygments.lexers import get_lexer_by_name @@ -16,7 +18,7 @@ def src(doc, code, cls="", root=True): def img(doc, item, cls="", root=True): - caption = doc.previous("CAPTION") + caption = doc.previous(tokens.CAPTION) text = "" if caption: text = f'

{caption.value}

' @@ -39,39 +41,43 @@ def parse_list_html(doc, token, cls="", root=True): def parse_text_html(doc, token, cls="", root=True): - # if its the start of a text body wrap html tags - # else more complicated so return the tags - # if root is True: - # return f"{token.value}

" return f"{token.value}" def blockquote(doc, token, cls="", root=True): return "%s\n" % ( - cls, token.value.replace("\n", "
") + cls, + token.value.replace("\n", "
"), ) +def header(doc, item, cls="", root=True): + depth = "1" + if item.attrs: + depth = item.attrs.get("depth", "1") + return "%s\n" % (depth, cls, item.value, depth) + + builddoc = { - "HEADER1": ("h2", None), - "HEADER2": ("h3", None), - "HEADER3": ("h4", None), - "IMG": (img, "materialboxed center-align responsive-img"), - "LINK": (link, None), - "B": ("b", None), - "U": ("u", None), - "I": ("i", None), - "V": ("code", None), - "LIST": (parse_list_html, "flow-text"), - "TEXT": (parse_text_html, "flow-text"), - "SRC_BEGIN": (src, None), - "EXAMPLE": (blockquote, None), + tokens.HEADER: (header, None), + tokens.IMAGE: (img, "materialboxed center-align responsive-img"), + tokens.LINK: (link, None), + tokens.BOLD: ("b", None), + tokens.UNDERLINED: ("u", None), + tokens.ITALIC: ("i", None), + tokens.VERBATIM: ("code", None), + tokens.LIST: (parse_list_html, "flow-text"), + tokens.TEXT: (parse_text_html, "flow-text"), + tokens.SOURCE: (src, None), + tokens.EXAMPLE: (blockquote, None), + tokens.RESULTS: (blockquote, None), } def handle_token(doc, item, root=False): response = StringIO() match = builddoc.get(item.token) + if not match: return "" diff --git a/eorg/parser.py b/eorg/parser.py index 08e8cd5..3f4911f 100644 --- a/eorg/parser.py +++ b/eorg/parser.py @@ -1,19 +1,9 @@ import re +from eorg import tokens +from eorg.tokens import Token from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions -class Token: - __slots__ = ["token", "value", "attrs"] - - def __init__(self, token, value="", attrs=""): - self.token = token - self.value = value - self.attrs = attrs - - def __repr__(self): - return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")' - - class Document: pos = 0 doc = [] @@ -68,12 +58,12 @@ class Document: def images(self): for item in self.__iter__(): - if item.token == 'IMG': + if item.token == tokens.IMAGE: yield item.value[0] - if item.token == 'TEXT': + if item.token == tokens.TEXT: if isinstance(item.value, list): for token in item.value: - if token.token == 'IMG': + if token.token == tokens.IMAGE: yield token def __len__(self): @@ -99,31 +89,32 @@ def parsebody(text, rx): return rx, text + "\n" def parseline(text): + attrs=None for key, (rx, block, s, e, count) in TOKENS.items(): match = re.search(rx, text) if not match: continue level = len(match.group(0)) if count is True: - key += str(level) - if key == "META": + attrs={'depth': level} + if key == tokens.META: return ( block, Token(token=match.group(0)[s:e], value=text[match.end() :]), ) - if key == "SRC_BEGIN": + if key == tokens.SOURCE: return block, Token(token=key, attrs=parse_attrs(text[match.end():])) - return block, Token(token=key, value=text[match.end():]) + return block, Token(token=key, value=text[match.end():], attrs=attrs) text = text.strip() if text == "": - return False, Token(token="BREAK", value=text) - return False, Token(token="LIST", value=text + " ") + return False, Token(token=tokens.BLANK, value=text) + return False, Token(token=tokens.LIST, value=text + " ") def parse_text(txt): char = True - tokens = [] + tokenlist = [] def img(char, step): if char != '[': @@ -149,13 +140,13 @@ def parse_text(txt): char = next(step, None) if path.endswith(image_extensions): - tokens.append(Token('IMG', [path, alt])) + tokenlist.append(Token(tokens.IMAGE, [path, alt])) return '' - tokens.append(Token('LINK', [path, alt])) + tokenlist.append(Token(tokens.LINK, [path, alt])) return '' - def emphasis(char, step, end='*', tag='B'): + def emphasis(char, step, end, tag): if not char or char!=end: return char char = next(step, None) @@ -163,28 +154,28 @@ def parse_text(txt): while char and char not in [end] + ESCAPE: r += char char = next(step, None) - tokens.append(Token(tag, r)) + tokenlist.append(Token(tag, r)) return '' step = iter(txt) while char is not None: char = next(step, None) - char = emphasis(char, step, '*', 'B') - char = emphasis(char, step, '/', 'I') - char = emphasis(char, step, '_', 'U') - char = emphasis(char, step, '=', 'V') + char = emphasis(char, step, '*', tokens.BOLD) + char = emphasis(char, step, '/', tokens.ITALIC) + char = emphasis(char, step, '_', tokens.UNDERLINED) + char = emphasis(char, step, '=', tokens.VERBATIM) char = emphasis(char, step, '~', 'PRE') char = img(char, step) if not char: continue - if len(tokens) == 0: - tokens.append(Token('TEXT', char)) + if len(tokenlist) == 0: + tokenlist.append(Token(tokens.TEXT, char)) continue - if tokens[-1].token != 'TEXT': - tokens.append(Token('TEXT', char)) + if tokenlist[-1].token != tokens.TEXT: + tokenlist.append(Token(tokens.TEXT, char)) continue - tokens[-1].value += char - return tokens + tokenlist[-1].value += char + return tokenlist def parse(stream): @@ -199,11 +190,11 @@ def parse(stream): continue block, token = parseline(line) if token: - if doc.token() == "LIST" and token.token == "LIST": + if doc.token() == tokens.LIST and token.token == tokens.LIST: doc.update(token.value) continue doc.append(token) - for item in doc.filter('LIST'): + for item in doc.filter(tokens.LIST): item.value = parse_text(item.value) return doc diff --git a/eorg/version.py b/eorg/version.py index 0cbda9b..1e842aa 100755 --- a/eorg/version.py +++ b/eorg/version.py @@ -1 +1 @@ -__version__=0.52 +__version__=0.60 diff --git a/examples/html-enaml/example.py b/examples/html-enaml/example.py index 36796b2..27745d3 100755 --- a/examples/html-enaml/example.py +++ b/examples/html-enaml/example.py @@ -1,10 +1,8 @@ import os import enaml - from eorg.parser import parse from web.core.app import WebApplication - doc = [] with open(os.path.abspath("../../tests/fixtures/test.org"), "r") as fp: doc = parse(fp) diff --git a/examples/html-enaml/output.html b/examples/html-enaml/output.html index 778fcb5..61ee331 100644 --- a/examples/html-enaml/output.html +++ b/examples/html-enaml/output.html @@ -1,19 +1,19 @@ - - -test - - - + + +test + + + -
-

Emacs org-mode examples

-

Test DESCRIPTION

-
-
- Emacs org-mode examplesmore_vert

This is a link

+
+

Emacs org-mode examples

+

Test DESCRIPTION

+
+
+ Emacs org-mode examplesmore_vert

This is a link

-
-Card titlemore_vert

Test DESCRIPTION

+
+Card titlemore_vert

Test DESCRIPTION

diff --git a/examples/html-plain/example.py b/examples/html-plain/example.py index 0a4e743..ede27cd 100755 --- a/examples/html-plain/example.py +++ b/examples/html-plain/example.py @@ -1,24 +1,8 @@ -import os +import os from eorg.parser import parse +from eorg.generate import html -doc=[] with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp: doc = parse(fp) - -builddoc ={ - "TITLE": "h1", - "EMAIL": "h1", - "AUTHOR": "h1", - "HEADER1": "h1", - "HEADER2": "h2", - "HEADER3": "h3", - "SRC_BEGIN": "pre", - "COMMENT": "pre", -} - - -with open('test.html', 'w') as fp: - for item in doc: - print(item) - tag = builddoc[item.token] - fp.write('<%s>%s<%s/>\n' % (tag, item.value, tag)) + with open('test.html', 'w') as fp: + fp.write(html(doc).read()) diff --git a/examples/html-plain/test.html b/examples/html-plain/test.html index 50afeb1..c5d631e 100644 --- a/examples/html-plain/test.html +++ b/examples/html-plain/test.html @@ -1,26 +1,11 @@ -

Emacs org-mode examples

-

Emacs org-mode examples2

-

Eric H. Neilsen, Jr.

-

neilsen@fnal.gov

-

Header 1

-

Sub Header 1

-

Sub Header 2

-

Header 2

-
emacs-lisp :results silent(some lispy code)
-#+END_SRC
-
-#+BEGIN_COMMENT
-.. title: Building a sumo robot ring
-.. slug: building-a-sumo-robot-ring
-.. date: 2017-08-21 12:00:00 UTC
-.. tags: diy, robots, hackspace
-.. category: hardware
-.. description: Attempt at building a largish sumo ring
-.. type: text
-#+END_COMMENT
-
-
-#+BEGIN_SRC emacs-lisp :results silent
-(test code)
-#+END_SRC
-
+

Header 1

+

Sub Header 1

+

body text + over multiple lines +

Sub Header 2

+

Header 2

+
1
(some lispy code)
+
+
1
(test code)
+
+
\ No newline at end of file diff --git a/examples/raw/filtering.py b/examples/raw/filtering.py new file mode 100644 index 0000000..bee57c3 --- /dev/null +++ b/examples/raw/filtering.py @@ -0,0 +1,9 @@ +import os +from eorg.parser import parse +from eorg.generate import html + +with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp: + doc = parse(fp) + for row in doc.filter('SRC_BEGIN'): + print(row.token) + print(row.value) diff --git a/readme.org b/readme.org index 0d10274..9bcd6e0 100755 --- a/readme.org +++ b/readme.org @@ -21,14 +21,26 @@ python setup.py develop * Examples ** Generating plain html Simple raw html generation -#+BEGIN_SRC sh :results output drawer -head -n 5 examples/html-plain/example.py -#+END_SRC +#+BEGIN_SRC sh :results code + import os + from eorg.parser import parse + from eorg.generate import html -** Enaml web templating language -Written mainly to try out enaml-web -#+BEGIN_SRC sh :results output drawer -head -n 5 examples/html-enaml/example.py + with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp: + doc = parse(fp) + print(html(doc).read()) #+END_SRC +Simple raw html generation +#+BEGIN_SRC sh :results code + import os + from eorg.parser import parse + from eorg.generate import html + + with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp: + doc = parse(fp) + for row in doc.filter('src'): + print(row.token) +#+END_SRC + diff --git a/tests/test_documents.py b/tests/test_documents.py index b8d3871..0c00159 100755 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -7,8 +7,8 @@ from eorg.generate import html def test_basic(): with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp: doc = parse(fp) - assert doc.title != '' - assert doc.author != '' + assert doc.title != "" + assert doc.author != "" assert len(doc) == 20 @@ -18,21 +18,22 @@ def test_body(): assert len([i for i in doc.body()]) > 0 - - def test_html_output(): with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp: doc = parse(fp) htmlbody = html(doc).read() print(htmlbody) - assert htmlbody == """

#+DATE: jkkj

Header 1

-

Sub Header 1

+ assert ( + htmlbody + == """

Header 1

+

Sub Header 1

body text over multiple lines -

Sub Header 2

-

Header 2

+

Sub Header 2

+

Header 2

1
(some lispy code)
 
1
(test code)
 
""" + ) diff --git a/tests/test_html.py b/tests/test_html.py index 5ccd6a7..606723a 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,7 +1,8 @@ import os import pytest from io import StringIO -from eorg.parser import Token +from eorg import tokens +from eorg.tokens import Token from eorg.parser import parse from eorg.parser import parse_text @@ -9,37 +10,37 @@ from eorg.parser import parse_text def test_emphasis(): text = "parse emphasis *bold text* _underlined text_ /italic text/ normal text" expected = [ - Token(token="TEXT", value="parse emphasis "), - Token(token="B", value="bold text"), - Token(token="TEXT", value=" "), - Token(token="U", value="underlined text"), - Token(token="TEXT", value=" "), - Token(token="I", value="italic text"), - Token("TEXT", " normal text"), + Token(token=tokens.TEXT, value="parse emphasis "), + Token(token=tokens.BOLD, value="bold text"), + Token(token=tokens.TEXT, value=" "), + Token(token=tokens.UNDERLINED, value="underlined text"), + Token(token=tokens.TEXT, value=" "), + Token(token=tokens.ITALIC, value="italic text"), + Token(tokens.TEXT, " normal text"), ] result = parse_text(text) - assert result[0].token == "TEXT" + assert result[0].token == tokens.TEXT assert expected[0].value == result[0].value - assert result[1].token == "B" + assert result[1].token == tokens.BOLD assert expected[1].value == result[1].value - assert result[2].token == "TEXT" + assert result[2].token == tokens.TEXT assert expected[2].value == result[2].value - assert result[3].token == "U" + assert result[3].token == tokens.UNDERLINED assert expected[3].value == result[3].value - assert result[4].token == "TEXT" + assert result[4].token == tokens.TEXT assert expected[4].value == result[4].value - assert result[5].token == "I" + assert result[5].token == tokens.ITALIC assert expected[5].value == result[5].value - assert result[6].token == "TEXT" + assert result[6].token == tokens.TEXT assert expected[6].value == result[6].value def test_image(): text = "parse image [[../../test.jpg][test]] after image" expected = [ - Token("TEXT", "parse image "), - Token("IMG", ["../../test.jpg", "test"]), - Token("TEXT", " after image"), + Token(tokens.TEXT, "parse image "), + Token(tokens.IMAGE, ["../../test.jpg", "test"]), + Token(tokens.TEXT, " after image"), ] result = parse_text(text) assert result[0].value == expected[0].value @@ -50,9 +51,9 @@ def test_image(): def test_link(): text = "parse link [[../../test.html][test]] after link" expected = [ - Token("TEXT", "parse link "), - Token("IMG", ["../../test.html", "test"]), - Token("TEXT", " after link"), + Token(tokens.TEXT, "parse link "), + Token(tokens.LINK, ["../../test.html", "test"]), + Token(tokens.TEXT, " after link"), ] result = parse_text(text) assert result[0].value == expected[0].value @@ -71,9 +72,9 @@ _I'm underlined text_ ) expected = [ - Token("BREAK", ""), + Token(tokens.BLANK, ""), Token( - "EXAMPLE", + tokens.EXAMPLE, """*I'm bold text* /I'm italic text/ _I'm underlined text_ @@ -94,8 +95,8 @@ head -n 5 examples/html-plain/example.py ) expected = [ - Token("BREAK", ""), - Token("SRC", """head -n 5 examples/html-plain/example.py\n"""), + Token(tokens.BLANK, ""), + Token(tokens.SOURCE, """head -n 5 examples/html-plain/example.py\n"""), ] result = parse(text).doc print(result)