Refactored to use ints as token ids in place of strings

2018-11-03 14:10:37 +00:00 · 2018-11-03 14:10:37 +00:00 · 5e8ec3015a
parent 84eca442dc
commit 5e8ec3015a
12 changed files with 159 additions and 169 deletions
--- a/eorg/const.py
+++ b/eorg/const.py
@ -1,6 +1,8 @@
+from eorg import tokens
 ESCAPE = ['\n']

-METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS']
+
+METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS', 'FILETAGS', 'DATE']
 t_META = r"^[#]\+(" + '|'.join(METADATA) +")\:"
 t_BLANK_LINE = '^\s*$'
 t_COMMENT_BEGIN = r"^\#\+BEGIN_COMMENT"
@ -18,17 +20,18 @@ t_HEADER = r"^\*+"

 # Start regex, End regex, skip start, skip end, count matches
 TOKENS = {
-    "META": (t_META, False, 2, -1, False),
-    "COMMENT": (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
-    "EXAMPLE": (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
-    "IMG": (t_IMG, False, 2, None, False),
-    "CAPTION": (t_CAPTIONS, False, 2, None, False),
-    "SRC_BEGIN": (t_SRC_BEGIN, t_SRC_END, 2, None, False),
-    "RESULTS": (t_SRC_BEGIN, t_SRC_END, 2, None, False),
-    "HEADER": (t_HEADER, False, 1, None, True),
+    tokens.META: (t_META, False, 2, -1, False),
+    tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
+    tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
+    tokens.IMAGE: (t_IMG, False, 2, None, False),
+    tokens.CAPTION: (t_CAPTIONS, False, 2, None, False),
+    tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
+    tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
+    tokens.HEADER: (t_HEADER, False, 1, None, True),
 }


+
 class Token:
    __slots__ = ["token", "value"]

--- a/eorg/generate.py
+++ b/eorg/generate.py
@ -1,5 +1,7 @@
 from io import StringIO
 from eorg.const import Token, ESCAPE
+from eorg import tokens
+from eorg.tokens import Token
 from pygments import highlight
 from pygments.lexers import PythonLexer
 from pygments.lexers import get_lexer_by_name
@ -16,7 +18,7 @@ def src(doc, code, cls="", root=True):


 def img(doc, item, cls="", root=True):
-    caption = doc.previous("CAPTION")
+    caption = doc.previous(tokens.CAPTION)
    text = ""
    if caption:
        text = f'<p class="center-align">{caption.value}</p>'
@ -39,39 +41,43 @@ def parse_list_html(doc, token, cls="", root=True):


 def parse_text_html(doc, token, cls="", root=True):
-    # if its the start of a text body wrap html tags
-    # else more complicated so return the tags
-    # if root is True:
-    #    return f"<p{cls}>{token.value}</p>"
    return f"{token.value}"


 def blockquote(doc, token, cls="", root=True):
    return "<blockquote%s>%s</blockquote>\n" % (
-        cls, token.value.replace("\n", "<br />")
+        cls,
+        token.value.replace("\n", "<br />"),
    )


+def header(doc, item, cls="", root=True):
+    depth = "1"
+    if item.attrs:
+        depth = item.attrs.get("depth", "1")
+    return "<h%s%s>%s</h%s>\n" % (depth, cls, item.value, depth)
+
+
 builddoc = {
-    "HEADER1": ("h2", None),
-    "HEADER2": ("h3", None),
-    "HEADER3": ("h4", None),
-    "IMG": (img, "materialboxed center-align responsive-img"),
-    "LINK": (link, None),
-    "B": ("b", None),
-    "U": ("u", None),
-    "I": ("i", None),
-    "V": ("code", None),
-    "LIST": (parse_list_html, "flow-text"),
-    "TEXT": (parse_text_html, "flow-text"),
-    "SRC_BEGIN": (src, None),
-    "EXAMPLE": (blockquote, None),
+    tokens.HEADER: (header, None),
+    tokens.IMAGE: (img, "materialboxed center-align responsive-img"),
+    tokens.LINK: (link, None),
+    tokens.BOLD: ("b", None),
+    tokens.UNDERLINED: ("u", None),
+    tokens.ITALIC: ("i", None),
+    tokens.VERBATIM: ("code", None),
+    tokens.LIST: (parse_list_html, "flow-text"),
+    tokens.TEXT: (parse_text_html, "flow-text"),
+    tokens.SOURCE: (src, None),
+    tokens.EXAMPLE: (blockquote, None),
+    tokens.RESULTS: (blockquote, None),
 }


 def handle_token(doc, item, root=False):
    response = StringIO()
    match = builddoc.get(item.token)
+
    if not match:
        return ""

--- a/eorg/parser.py
+++ b/eorg/parser.py
@ -1,19 +1,9 @@
 import re
+from eorg import tokens
+from eorg.tokens import Token
 from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions


-class Token:
-    __slots__ = ["token", "value", "attrs"]
-
-    def __init__(self, token, value="", attrs=""):
-        self.token = token
-        self.value = value
-        self.attrs = attrs
-
-    def __repr__(self):
-        return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")'
-
-
 class Document:
    pos = 0
    doc = []
@ -68,12 +58,12 @@ class Document:

    def images(self):
        for item in self.__iter__():
-            if item.token == 'IMG':
+            if item.token == tokens.IMAGE:
                yield item.value[0]
-            if item.token == 'TEXT':
+            if item.token == tokens.TEXT:
                if isinstance(item.value, list):
                    for token in item.value:
-                        if token.token == 'IMG':
+                        if token.token == tokens.IMAGE:
                            yield token

    def __len__(self):
@ -99,31 +89,32 @@ def parsebody(text, rx):
    return rx, text + "\n"

 def parseline(text):
+    attrs=None
    for key, (rx, block, s, e, count) in TOKENS.items():
        match = re.search(rx, text)
        if not match:
            continue
        level = len(match.group(0))
        if count is True:
-            key += str(level)
-        if key == "META":
+            attrs={'depth': level}
+        if key == tokens.META:
            return (
                block,
                Token(token=match.group(0)[s:e], value=text[match.end() :]),
            )
-        if key == "SRC_BEGIN":
+        if key == tokens.SOURCE:
            return block, Token(token=key, attrs=parse_attrs(text[match.end():]))
-        return block, Token(token=key, value=text[match.end():])
+        return block, Token(token=key, value=text[match.end():], attrs=attrs)

    text = text.strip()
    if text == "":
-        return False, Token(token="BREAK", value=text)
-    return False, Token(token="LIST", value=text + " ")
+        return False, Token(token=tokens.BLANK, value=text)
+    return False, Token(token=tokens.LIST, value=text + " ")


 def parse_text(txt):
    char = True
-    tokens = []
+    tokenlist = []

    def img(char, step):
        if char != '[':
@ -149,13 +140,13 @@ def parse_text(txt):
            char = next(step, None)

        if path.endswith(image_extensions):
-            tokens.append(Token('IMG', [path, alt]))
+            tokenlist.append(Token(tokens.IMAGE, [path, alt]))
            return ''

-        tokens.append(Token('LINK', [path, alt]))
+        tokenlist.append(Token(tokens.LINK, [path, alt]))
        return ''

-    def emphasis(char, step, end='*', tag='B'):
+    def emphasis(char, step, end, tag):
        if not char or char!=end:
            return char
        char = next(step, None)
@ -163,28 +154,28 @@ def parse_text(txt):
        while char and char not in [end] + ESCAPE:
            r += char
            char = next(step, None)
-        tokens.append(Token(tag, r))
+        tokenlist.append(Token(tag, r))
        return ''

    step = iter(txt)
    while char is not None:
        char = next(step, None)
-        char = emphasis(char, step, '*', 'B')
-        char = emphasis(char, step, '/', 'I')
-        char = emphasis(char, step, '_', 'U')
-        char = emphasis(char, step, '=', 'V')
+        char = emphasis(char, step, '*', tokens.BOLD)
+        char = emphasis(char, step, '/', tokens.ITALIC)
+        char = emphasis(char, step, '_', tokens.UNDERLINED)
+        char = emphasis(char, step, '=', tokens.VERBATIM)
        char = emphasis(char, step, '~', 'PRE')
        char = img(char, step)
        if not char:
            continue
-        if len(tokens) == 0:
-            tokens.append(Token('TEXT', char))
+        if len(tokenlist) == 0:
+            tokenlist.append(Token(tokens.TEXT, char))
            continue
-        if tokens[-1].token != 'TEXT':
-            tokens.append(Token('TEXT', char))
+        if tokenlist[-1].token != tokens.TEXT:
+            tokenlist.append(Token(tokens.TEXT, char))
            continue
-        tokens[-1].value += char
-    return tokens
+        tokenlist[-1].value += char
+    return tokenlist


 def parse(stream):
@ -199,11 +190,11 @@ def parse(stream):
            continue
        block, token = parseline(line)
        if token:
-            if doc.token() == "LIST" and token.token == "LIST":
+            if doc.token() == tokens.LIST and token.token == tokens.LIST:
                doc.update(token.value)
                continue
            doc.append(token)

-    for item in doc.filter('LIST'):
+    for item in doc.filter(tokens.LIST):
        item.value = parse_text(item.value)
    return doc
--- a/eorg/version.py
+++ b/eorg/version.py
@ -1 +1 @@
-__version__=0.52
+__version__=0.60
--- a/examples/html-enaml/example.py
+++ b/examples/html-enaml/example.py
@ -1,10 +1,8 @@
 import os
 import enaml
-
 from eorg.parser import parse
 from web.core.app import WebApplication

-
 doc = []
 with open(os.path.abspath("../../tests/fixtures/test.org"), "r") as fp:
    doc = parse(fp)
--- a/examples/html-enaml/output.html
+++ b/examples/html-enaml/output.html
@ -1,19 +1,19 @@
-<html ref="140522719362184">
-<head ref="140522719362504">
-<title ref="140522719362568">test</title>
-<link ref="140522719362120" rel="stylesheet" href="http://fonts.googleapis.com/icon?family=Material+Icons">
-<link ref="140522719362056" type="text/css" rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css" media="screen,projection">
-<script ref="140522719362440" src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js" type="text/javascript"></script><meta ref="140522716394632" name="viewport">
+<html ref="140303513279624">
+<head ref="140303513279880">
+<title ref="140303513279816">test</title>
+<link ref="140303513280200" rel="stylesheet" href="http://fonts.googleapis.com/icon?family=Material+Icons">
+<link ref="140303513279496" type="text/css" rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css" media="screen,projection">
+<script ref="140303513280264" src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js" type="text/javascript"></script><meta ref="140303510299400" name="viewport">
 </head>
-<body ref="140522716394504"><div ref="140522716394376" class="container"><div ref="140522716394248" class="row">
-<h1 ref="140522716394184">     Emacs org-mode examples</h1>
-<p ref="140522716394120"> Test DESCRIPTION</p>
-<card ref="140522716393992" class="card"><div ref="140522716393864" class="card-image waves-effect waves-block waves-light"><img ref="140522716393736" class="activator" src="images/office.jpg"></div>
-<div ref="140522716393608" class="card-content">
-<span ref="140522716395528" class='card-title activator grey-text text-darken-4"&gt;Card Title&lt;i class="material-icons right'>     Emacs org-mode examples<i ref="140522716395656" class="material-icons right">more_vert</i></span><p ref="140522716394824"><a ref="140522716397320" href="#">This is a link</a></p>
+<body ref="140303510299144"><div ref="140303510299016" class="container"><div ref="140303510298888" class="row">
+<h1 ref="140303510298760">     Emacs org-mode examples</h1>
+<p ref="140303510301640"> Test DESCRIPTION</p>
+<card ref="140303510301384" class="card"><div ref="140303510300104" class="card-image waves-effect waves-block waves-light"><img ref="140303510301768" class="activator" src="images/office.jpg"></div>
+<div ref="140303510300360" class="card-content">
+<span ref="140303510301512" class='card-title activator grey-text text-darken-4"&gt;Card Title&lt;i class="material-icons right'>     Emacs org-mode examples<i ref="140303510301128" class="material-icons right">more_vert</i></span><p ref="140303510300488"><a ref="140303510298696" href="#">This is a link</a></p>
 </div>
-<div ref="140522716397192" class="card-reveal">
-<span ref="140522716396808" class="card-title grey-text text-darken-4">Card title<i ref="140522716396296" class="material-icons right">more_vert</i></span><p ref="140522716397448"> Test DESCRIPTION</p>
+<div ref="140303510300872" class="card-reveal">
+<span ref="140303513074888" class="card-title grey-text text-darken-4">Card title<i ref="140303512786760" class="material-icons right">more_vert</i></span><p ref="140303512788360"> Test DESCRIPTION</p>
 </div></card>
 </div></div></body>
 </html>
--- a/examples/html-plain/example.py
+++ b/examples/html-plain/example.py
@ -1,24 +1,8 @@
-import os 
+import os
 from eorg.parser import parse
+from eorg.generate import html

-doc=[]
 with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
    doc = parse(fp)
-
-builddoc ={
-    "TITLE": "h1",
-    "EMAIL": "h1",
-    "AUTHOR": "h1",
-    "HEADER1": "h1",
-    "HEADER2": "h2",
-    "HEADER3": "h3",
-    "SRC_BEGIN": "pre",
-    "COMMENT": "pre",
-}
-
-
-with open('test.html', 'w') as fp:
-    for item in doc:
-        print(item)
-        tag = builddoc[item.token]
-        fp.write('<%s>%s<%s/>\n' % (tag, item.value, tag))
+    with open('test.html', 'w') as fp:
+        fp.write(html(doc).read())
--- a/examples/html-plain/test.html
+++ b/examples/html-plain/test.html
@ -1,26 +1,11 @@
-<h1>     Emacs org-mode examples<h1/>
-<h1>     Emacs org-mode examples2<h1/>
-<h1>    Eric H. Neilsen, Jr.<h1/>
-<h1>     neilsen@fnal.gov<h1/>
-<h1> Header 1<h1/>
-<h2> Sub Header 1<h2/>
-<h2> Sub Header 2<h2/>
-<h1> Header 2<h1/>
-<pre>emacs-lisp :results silent(some lispy code)
-#+END_SRC
-
-#+BEGIN_COMMENT
-.. title: Building a sumo robot ring
-.. slug: building-a-sumo-robot-ring
-.. date: 2017-08-21 12:00:00 UTC
-.. tags: diy, robots, hackspace
-.. category: hardware
-.. description: Attempt at building a largish sumo ring
-.. type: text
-#+END_COMMENT
-
-
-#+BEGIN_SRC emacs-lisp :results silent
-(test code)
-#+END_SRC
-<pre/>
+<h1> Header 1</h1>
+<h2> Sub Header 1</h2>
+<p class="flow-text">body <code>text</code>
+ over multiple <b>lines</b>
+ </p><h2> Sub Header 2</h2>
+<h1> Header 2</h1>
+<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">some</span> <span class="nv">lispy</span> <span class="nv">code</span><span class="p">)</span>
+</pre></div>
+</td></tr></table><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">test</span> <span class="nv">code</span><span class="p">)</span>
+</pre></div>
+</td></tr></table>
--- a/examples/raw/filtering.py
+++ b/examples/raw/filtering.py
@ -0,0 +1,9 @@
+import os
+from eorg.parser import parse
+from eorg.generate import html
+
+with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
+    doc = parse(fp)
+    for row in doc.filter('SRC_BEGIN'):
+        print(row.token)
+        print(row.value)
--- a/readme.org
+++ b/readme.org
@ -21,14 +21,26 @@ python setup.py develop
 * Examples
 ** Generating plain html
 Simple raw html generation
-#+BEGIN_SRC sh :results output drawer
-head -n 5 examples/html-plain/example.py
-#+END_SRC
+#+BEGIN_SRC sh :results code
+  import os
+  from eorg.parser import parse
+  from eorg.generate import html

-** Enaml web templating language
-Written mainly to try out enaml-web
-#+BEGIN_SRC sh :results output drawer
-head -n 5 examples/html-enaml/example.py
+  with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
+      doc = parse(fp)
+      print(html(doc).read())
 #+END_SRC


+Simple raw html generation
+#+BEGIN_SRC sh :results code
+  import os
+  from eorg.parser import parse
+  from eorg.generate import html
+
+  with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
+      doc = parse(fp)
+      for row in doc.filter('src'):
+          print(row.token)
+#+END_SRC
+
--- a/tests/test_documents.py
+++ b/tests/test_documents.py
@ -7,8 +7,8 @@ from eorg.generate import html
 def test_basic():
    with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
        doc = parse(fp)
-        assert doc.title != ''
-        assert doc.author != ''
+        assert doc.title != ""
+        assert doc.author != ""
        assert len(doc) == 20


@ -18,21 +18,22 @@ def test_body():
        assert len([i for i in doc.body()]) > 0


-
-
 def test_html_output():
    with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
        doc = parse(fp)
        htmlbody = html(doc).read()
        print(htmlbody)
-        assert htmlbody == """<p class="flow-text">#+DATE: jkkj </p><h2> Header 1</h2>
-<h3> Sub Header 1</h3>
+        assert (
+            htmlbody
+            == """<h1> Header 1</h1>
+<h2> Sub Header 1</h2>
 <p class="flow-text">body <code>text</code>
 over multiple <b>lines</b>
- </p><h3> Sub Header 2</h3>
-<h2> Header 2</h2>
+ </p><h2> Sub Header 2</h2>
+<h1> Header 2</h1>
 <table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">some</span> <span class="nv">lispy</span> <span class="nv">code</span><span class="p">)</span>
 </pre></div>
 </td></tr></table><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">test</span> <span class="nv">code</span><span class="p">)</span>
 </pre></div>
 </td></tr></table>"""
+        )
--- a/tests/test_html.py
+++ b/tests/test_html.py
@ -1,7 +1,8 @@
 import os
 import pytest
 from io import StringIO
-from eorg.parser import Token
+from eorg import tokens
+from eorg.tokens import Token
 from eorg.parser import parse
 from eorg.parser import parse_text

@ -9,37 +10,37 @@ from eorg.parser import parse_text
 def test_emphasis():
    text = "parse emphasis *bold text* _underlined text_ /italic text/ normal text"
    expected = [
-        Token(token="TEXT", value="parse emphasis "),
-        Token(token="B", value="bold text"),
-        Token(token="TEXT", value=" "),
-        Token(token="U", value="underlined text"),
-        Token(token="TEXT", value=" "),
-        Token(token="I", value="italic text"),
-        Token("TEXT", " normal text"),
+        Token(token=tokens.TEXT, value="parse emphasis "),
+        Token(token=tokens.BOLD, value="bold text"),
+        Token(token=tokens.TEXT, value=" "),
+        Token(token=tokens.UNDERLINED, value="underlined text"),
+        Token(token=tokens.TEXT, value=" "),
+        Token(token=tokens.ITALIC, value="italic text"),
+        Token(tokens.TEXT, " normal text"),
    ]
    result = parse_text(text)
-    assert result[0].token == "TEXT"
+    assert result[0].token == tokens.TEXT
    assert expected[0].value == result[0].value
-    assert result[1].token == "B"
+    assert result[1].token == tokens.BOLD
    assert expected[1].value == result[1].value
-    assert result[2].token == "TEXT"
+    assert result[2].token == tokens.TEXT
    assert expected[2].value == result[2].value
-    assert result[3].token == "U"
+    assert result[3].token == tokens.UNDERLINED
    assert expected[3].value == result[3].value
-    assert result[4].token == "TEXT"
+    assert result[4].token == tokens.TEXT
    assert expected[4].value == result[4].value
-    assert result[5].token == "I"
+    assert result[5].token == tokens.ITALIC
    assert expected[5].value == result[5].value
-    assert result[6].token == "TEXT"
+    assert result[6].token == tokens.TEXT
    assert expected[6].value == result[6].value


 def test_image():
    text = "parse image [[../../test.jpg][test]] after image"
    expected = [
-        Token("TEXT", "parse image "),
-        Token("IMG", ["../../test.jpg", "test"]),
-        Token("TEXT", " after image"),
+        Token(tokens.TEXT, "parse image "),
+        Token(tokens.IMAGE, ["../../test.jpg", "test"]),
+        Token(tokens.TEXT, " after image"),
    ]
    result = parse_text(text)
    assert result[0].value == expected[0].value
@ -50,9 +51,9 @@ def test_image():
 def test_link():
    text = "parse link [[../../test.html][test]] after link"
    expected = [
-        Token("TEXT", "parse link "),
-        Token("IMG", ["../../test.html", "test"]),
-        Token("TEXT", " after link"),
+        Token(tokens.TEXT, "parse link "),
+        Token(tokens.LINK, ["../../test.html", "test"]),
+        Token(tokens.TEXT, " after link"),
    ]
    result = parse_text(text)
    assert result[0].value == expected[0].value
@ -71,9 +72,9 @@ _I'm underlined text_
    )

    expected = [
-        Token("BREAK", ""),
+        Token(tokens.BLANK, ""),
        Token(
-            "EXAMPLE",
+            tokens.EXAMPLE,
            """*I'm bold text*
 /I'm italic text/
 _I'm underlined text_
@ -94,8 +95,8 @@ head -n 5 examples/html-plain/example.py
    )

    expected = [
-        Token("BREAK", ""),
-        Token("SRC", """head -n 5 examples/html-plain/example.py\n"""),
+        Token(tokens.BLANK, ""),
+        Token(tokens.SOURCE, """head -n 5 examples/html-plain/example.py\n"""),
    ]
    result = parse(text).doc
    print(result)