Refactored to use ints as token ids in place of strings

This commit is contained in:
Oliver Marks 2018-11-03 14:10:37 +00:00
parent 84eca442dc
commit 5e8ec3015a
12 changed files with 159 additions and 169 deletions

View File

@ -1,6 +1,8 @@
from eorg import tokens
ESCAPE = ['\n']
METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS']
METADATA = ['TITLE', 'AUTHOR', 'EMAIL', 'DESCRIPTION', 'KEYWORDS', 'FILETAGS', 'DATE']
t_META = r"^[#]\+(" + '|'.join(METADATA) +")\:"
t_BLANK_LINE = '^\s*$'
t_COMMENT_BEGIN = r"^\#\+BEGIN_COMMENT"
@ -18,17 +20,18 @@ t_HEADER = r"^\*+"
# Start regex, End regex, skip start, skip end, count matches
TOKENS = {
"META": (t_META, False, 2, -1, False),
"COMMENT": (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
"EXAMPLE": (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
"IMG": (t_IMG, False, 2, None, False),
"CAPTION": (t_CAPTIONS, False, 2, None, False),
"SRC_BEGIN": (t_SRC_BEGIN, t_SRC_END, 2, None, False),
"RESULTS": (t_SRC_BEGIN, t_SRC_END, 2, None, False),
"HEADER": (t_HEADER, False, 1, None, True),
tokens.META: (t_META, False, 2, -1, False),
tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
tokens.IMAGE: (t_IMG, False, 2, None, False),
tokens.CAPTION: (t_CAPTIONS, False, 2, None, False),
tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
tokens.HEADER: (t_HEADER, False, 1, None, True),
}
class Token:
__slots__ = ["token", "value"]

View File

@ -1,5 +1,7 @@
from io import StringIO
from eorg.const import Token, ESCAPE
from eorg import tokens
from eorg.tokens import Token
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.lexers import get_lexer_by_name
@ -16,7 +18,7 @@ def src(doc, code, cls="", root=True):
def img(doc, item, cls="", root=True):
caption = doc.previous("CAPTION")
caption = doc.previous(tokens.CAPTION)
text = ""
if caption:
text = f'<p class="center-align">{caption.value}</p>'
@ -39,39 +41,43 @@ def parse_list_html(doc, token, cls="", root=True):
def parse_text_html(doc, token, cls="", root=True):
# if its the start of a text body wrap html tags
# else more complicated so return the tags
# if root is True:
# return f"<p{cls}>{token.value}</p>"
return f"{token.value}"
def blockquote(doc, token, cls="", root=True):
return "<blockquote%s>%s</blockquote>\n" % (
cls, token.value.replace("\n", "<br />")
cls,
token.value.replace("\n", "<br />"),
)
def header(doc, item, cls="", root=True):
depth = "1"
if item.attrs:
depth = item.attrs.get("depth", "1")
return "<h%s%s>%s</h%s>\n" % (depth, cls, item.value, depth)
builddoc = {
"HEADER1": ("h2", None),
"HEADER2": ("h3", None),
"HEADER3": ("h4", None),
"IMG": (img, "materialboxed center-align responsive-img"),
"LINK": (link, None),
"B": ("b", None),
"U": ("u", None),
"I": ("i", None),
"V": ("code", None),
"LIST": (parse_list_html, "flow-text"),
"TEXT": (parse_text_html, "flow-text"),
"SRC_BEGIN": (src, None),
"EXAMPLE": (blockquote, None),
tokens.HEADER: (header, None),
tokens.IMAGE: (img, "materialboxed center-align responsive-img"),
tokens.LINK: (link, None),
tokens.BOLD: ("b", None),
tokens.UNDERLINED: ("u", None),
tokens.ITALIC: ("i", None),
tokens.VERBATIM: ("code", None),
tokens.LIST: (parse_list_html, "flow-text"),
tokens.TEXT: (parse_text_html, "flow-text"),
tokens.SOURCE: (src, None),
tokens.EXAMPLE: (blockquote, None),
tokens.RESULTS: (blockquote, None),
}
def handle_token(doc, item, root=False):
response = StringIO()
match = builddoc.get(item.token)
if not match:
return ""

View File

@ -1,19 +1,9 @@
import re
from eorg import tokens
from eorg.tokens import Token
from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions
class Token:
__slots__ = ["token", "value", "attrs"]
def __init__(self, token, value="", attrs=""):
self.token = token
self.value = value
self.attrs = attrs
def __repr__(self):
return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")'
class Document:
pos = 0
doc = []
@ -68,12 +58,12 @@ class Document:
def images(self):
for item in self.__iter__():
if item.token == 'IMG':
if item.token == tokens.IMAGE:
yield item.value[0]
if item.token == 'TEXT':
if item.token == tokens.TEXT:
if isinstance(item.value, list):
for token in item.value:
if token.token == 'IMG':
if token.token == tokens.IMAGE:
yield token
def __len__(self):
@ -99,31 +89,32 @@ def parsebody(text, rx):
return rx, text + "\n"
def parseline(text):
attrs=None
for key, (rx, block, s, e, count) in TOKENS.items():
match = re.search(rx, text)
if not match:
continue
level = len(match.group(0))
if count is True:
key += str(level)
if key == "META":
attrs={'depth': level}
if key == tokens.META:
return (
block,
Token(token=match.group(0)[s:e], value=text[match.end() :]),
)
if key == "SRC_BEGIN":
if key == tokens.SOURCE:
return block, Token(token=key, attrs=parse_attrs(text[match.end():]))
return block, Token(token=key, value=text[match.end():])
return block, Token(token=key, value=text[match.end():], attrs=attrs)
text = text.strip()
if text == "":
return False, Token(token="BREAK", value=text)
return False, Token(token="LIST", value=text + " ")
return False, Token(token=tokens.BLANK, value=text)
return False, Token(token=tokens.LIST, value=text + " ")
def parse_text(txt):
char = True
tokens = []
tokenlist = []
def img(char, step):
if char != '[':
@ -149,13 +140,13 @@ def parse_text(txt):
char = next(step, None)
if path.endswith(image_extensions):
tokens.append(Token('IMG', [path, alt]))
tokenlist.append(Token(tokens.IMAGE, [path, alt]))
return ''
tokens.append(Token('LINK', [path, alt]))
tokenlist.append(Token(tokens.LINK, [path, alt]))
return ''
def emphasis(char, step, end='*', tag='B'):
def emphasis(char, step, end, tag):
if not char or char!=end:
return char
char = next(step, None)
@ -163,28 +154,28 @@ def parse_text(txt):
while char and char not in [end] + ESCAPE:
r += char
char = next(step, None)
tokens.append(Token(tag, r))
tokenlist.append(Token(tag, r))
return ''
step = iter(txt)
while char is not None:
char = next(step, None)
char = emphasis(char, step, '*', 'B')
char = emphasis(char, step, '/', 'I')
char = emphasis(char, step, '_', 'U')
char = emphasis(char, step, '=', 'V')
char = emphasis(char, step, '*', tokens.BOLD)
char = emphasis(char, step, '/', tokens.ITALIC)
char = emphasis(char, step, '_', tokens.UNDERLINED)
char = emphasis(char, step, '=', tokens.VERBATIM)
char = emphasis(char, step, '~', 'PRE')
char = img(char, step)
if not char:
continue
if len(tokens) == 0:
tokens.append(Token('TEXT', char))
if len(tokenlist) == 0:
tokenlist.append(Token(tokens.TEXT, char))
continue
if tokens[-1].token != 'TEXT':
tokens.append(Token('TEXT', char))
if tokenlist[-1].token != tokens.TEXT:
tokenlist.append(Token(tokens.TEXT, char))
continue
tokens[-1].value += char
return tokens
tokenlist[-1].value += char
return tokenlist
def parse(stream):
@ -199,11 +190,11 @@ def parse(stream):
continue
block, token = parseline(line)
if token:
if doc.token() == "LIST" and token.token == "LIST":
if doc.token() == tokens.LIST and token.token == tokens.LIST:
doc.update(token.value)
continue
doc.append(token)
for item in doc.filter('LIST'):
for item in doc.filter(tokens.LIST):
item.value = parse_text(item.value)
return doc

View File

@ -1 +1 @@
__version__=0.52
__version__=0.60

View File

@ -1,10 +1,8 @@
import os
import enaml
from eorg.parser import parse
from web.core.app import WebApplication
doc = []
with open(os.path.abspath("../../tests/fixtures/test.org"), "r") as fp:
doc = parse(fp)

View File

@ -1,19 +1,19 @@
<html ref="140522719362184">
<head ref="140522719362504">
<title ref="140522719362568">test</title>
<link ref="140522719362120" rel="stylesheet" href="http://fonts.googleapis.com/icon?family=Material+Icons">
<link ref="140522719362056" type="text/css" rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css" media="screen,projection">
<script ref="140522719362440" src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js" type="text/javascript"></script><meta ref="140522716394632" name="viewport">
<html ref="140303513279624">
<head ref="140303513279880">
<title ref="140303513279816">test</title>
<link ref="140303513280200" rel="stylesheet" href="http://fonts.googleapis.com/icon?family=Material+Icons">
<link ref="140303513279496" type="text/css" rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/css/materialize.min.css" media="screen,projection">
<script ref="140303513280264" src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0/js/materialize.min.js" type="text/javascript"></script><meta ref="140303510299400" name="viewport">
</head>
<body ref="140522716394504"><div ref="140522716394376" class="container"><div ref="140522716394248" class="row">
<h1 ref="140522716394184"> Emacs org-mode examples</h1>
<p ref="140522716394120"> Test DESCRIPTION</p>
<card ref="140522716393992" class="card"><div ref="140522716393864" class="card-image waves-effect waves-block waves-light"><img ref="140522716393736" class="activator" src="images/office.jpg"></div>
<div ref="140522716393608" class="card-content">
<span ref="140522716395528" class='card-title activator grey-text text-darken-4"&gt;Card Title&lt;i class="material-icons right'> Emacs org-mode examples<i ref="140522716395656" class="material-icons right">more_vert</i></span><p ref="140522716394824"><a ref="140522716397320" href="#">This is a link</a></p>
<body ref="140303510299144"><div ref="140303510299016" class="container"><div ref="140303510298888" class="row">
<h1 ref="140303510298760"> Emacs org-mode examples</h1>
<p ref="140303510301640"> Test DESCRIPTION</p>
<card ref="140303510301384" class="card"><div ref="140303510300104" class="card-image waves-effect waves-block waves-light"><img ref="140303510301768" class="activator" src="images/office.jpg"></div>
<div ref="140303510300360" class="card-content">
<span ref="140303510301512" class='card-title activator grey-text text-darken-4"&gt;Card Title&lt;i class="material-icons right'> Emacs org-mode examples<i ref="140303510301128" class="material-icons right">more_vert</i></span><p ref="140303510300488"><a ref="140303510298696" href="#">This is a link</a></p>
</div>
<div ref="140522716397192" class="card-reveal">
<span ref="140522716396808" class="card-title grey-text text-darken-4">Card title<i ref="140522716396296" class="material-icons right">more_vert</i></span><p ref="140522716397448"> Test DESCRIPTION</p>
<div ref="140303510300872" class="card-reveal">
<span ref="140303513074888" class="card-title grey-text text-darken-4">Card title<i ref="140303512786760" class="material-icons right">more_vert</i></span><p ref="140303512788360"> Test DESCRIPTION</p>
</div></card>
</div></div></body>
</html>

View File

@ -1,24 +1,8 @@
import os
import os
from eorg.parser import parse
from eorg.generate import html
doc=[]
with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
doc = parse(fp)
builddoc ={
"TITLE": "h1",
"EMAIL": "h1",
"AUTHOR": "h1",
"HEADER1": "h1",
"HEADER2": "h2",
"HEADER3": "h3",
"SRC_BEGIN": "pre",
"COMMENT": "pre",
}
with open('test.html', 'w') as fp:
for item in doc:
print(item)
tag = builddoc[item.token]
fp.write('<%s>%s<%s/>\n' % (tag, item.value, tag))
with open('test.html', 'w') as fp:
fp.write(html(doc).read())

View File

@ -1,26 +1,11 @@
<h1> Emacs org-mode examples<h1/>
<h1> Emacs org-mode examples2<h1/>
<h1> Eric H. Neilsen, Jr.<h1/>
<h1> neilsen@fnal.gov<h1/>
<h1> Header 1<h1/>
<h2> Sub Header 1<h2/>
<h2> Sub Header 2<h2/>
<h1> Header 2<h1/>
<pre>emacs-lisp :results silent(some lispy code)
#+END_SRC
#+BEGIN_COMMENT
.. title: Building a sumo robot ring
.. slug: building-a-sumo-robot-ring
.. date: 2017-08-21 12:00:00 UTC
.. tags: diy, robots, hackspace
.. category: hardware
.. description: Attempt at building a largish sumo ring
.. type: text
#+END_COMMENT
#+BEGIN_SRC emacs-lisp :results silent
(test code)
#+END_SRC
<pre/>
<h1> Header 1</h1>
<h2> Sub Header 1</h2>
<p class="flow-text">body <code>text</code>
over multiple <b>lines</b>
</p><h2> Sub Header 2</h2>
<h1> Header 2</h1>
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">some</span> <span class="nv">lispy</span> <span class="nv">code</span><span class="p">)</span>
</pre></div>
</td></tr></table><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">test</span> <span class="nv">code</span><span class="p">)</span>
</pre></div>
</td></tr></table>

View File

@ -0,0 +1,9 @@
import os
from eorg.parser import parse
from eorg.generate import html
with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
doc = parse(fp)
for row in doc.filter('SRC_BEGIN'):
print(row.token)
print(row.value)

View File

@ -21,14 +21,26 @@ python setup.py develop
* Examples
** Generating plain html
Simple raw html generation
#+BEGIN_SRC sh :results output drawer
head -n 5 examples/html-plain/example.py
#+END_SRC
#+BEGIN_SRC sh :results code
import os
from eorg.parser import parse
from eorg.generate import html
** Enaml web templating language
Written mainly to try out enaml-web
#+BEGIN_SRC sh :results output drawer
head -n 5 examples/html-enaml/example.py
with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
doc = parse(fp)
print(html(doc).read())
#+END_SRC
Simple raw html generation
#+BEGIN_SRC sh :results code
import os
from eorg.parser import parse
from eorg.generate import html
with open(os.path.abspath('../../tests/fixtures/test.org'), 'r') as fp:
doc = parse(fp)
for row in doc.filter('src'):
print(row.token)
#+END_SRC

View File

@ -7,8 +7,8 @@ from eorg.generate import html
def test_basic():
with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
doc = parse(fp)
assert doc.title != ''
assert doc.author != ''
assert doc.title != ""
assert doc.author != ""
assert len(doc) == 20
@ -18,21 +18,22 @@ def test_body():
assert len([i for i in doc.body()]) > 0
def test_html_output():
with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
doc = parse(fp)
htmlbody = html(doc).read()
print(htmlbody)
assert htmlbody == """<p class="flow-text">#+DATE: jkkj </p><h2> Header 1</h2>
<h3> Sub Header 1</h3>
assert (
htmlbody
== """<h1> Header 1</h1>
<h2> Sub Header 1</h2>
<p class="flow-text">body <code>text</code>
over multiple <b>lines</b>
</p><h3> Sub Header 2</h3>
<h2> Header 2</h2>
</p><h2> Sub Header 2</h2>
<h1> Header 2</h1>
<table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">some</span> <span class="nv">lispy</span> <span class="nv">code</span><span class="p">)</span>
</pre></div>
</td></tr></table><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1</pre></div></td><td class="code"><div class="highlight"><pre><span></span><span class="p">(</span><span class="nv">test</span> <span class="nv">code</span><span class="p">)</span>
</pre></div>
</td></tr></table>"""
)

View File

@ -1,7 +1,8 @@
import os
import pytest
from io import StringIO
from eorg.parser import Token
from eorg import tokens
from eorg.tokens import Token
from eorg.parser import parse
from eorg.parser import parse_text
@ -9,37 +10,37 @@ from eorg.parser import parse_text
def test_emphasis():
text = "parse emphasis *bold text* _underlined text_ /italic text/ normal text"
expected = [
Token(token="TEXT", value="parse emphasis "),
Token(token="B", value="bold text"),
Token(token="TEXT", value=" "),
Token(token="U", value="underlined text"),
Token(token="TEXT", value=" "),
Token(token="I", value="italic text"),
Token("TEXT", " normal text"),
Token(token=tokens.TEXT, value="parse emphasis "),
Token(token=tokens.BOLD, value="bold text"),
Token(token=tokens.TEXT, value=" "),
Token(token=tokens.UNDERLINED, value="underlined text"),
Token(token=tokens.TEXT, value=" "),
Token(token=tokens.ITALIC, value="italic text"),
Token(tokens.TEXT, " normal text"),
]
result = parse_text(text)
assert result[0].token == "TEXT"
assert result[0].token == tokens.TEXT
assert expected[0].value == result[0].value
assert result[1].token == "B"
assert result[1].token == tokens.BOLD
assert expected[1].value == result[1].value
assert result[2].token == "TEXT"
assert result[2].token == tokens.TEXT
assert expected[2].value == result[2].value
assert result[3].token == "U"
assert result[3].token == tokens.UNDERLINED
assert expected[3].value == result[3].value
assert result[4].token == "TEXT"
assert result[4].token == tokens.TEXT
assert expected[4].value == result[4].value
assert result[5].token == "I"
assert result[5].token == tokens.ITALIC
assert expected[5].value == result[5].value
assert result[6].token == "TEXT"
assert result[6].token == tokens.TEXT
assert expected[6].value == result[6].value
def test_image():
text = "parse image [[../../test.jpg][test]] after image"
expected = [
Token("TEXT", "parse image "),
Token("IMG", ["../../test.jpg", "test"]),
Token("TEXT", " after image"),
Token(tokens.TEXT, "parse image "),
Token(tokens.IMAGE, ["../../test.jpg", "test"]),
Token(tokens.TEXT, " after image"),
]
result = parse_text(text)
assert result[0].value == expected[0].value
@ -50,9 +51,9 @@ def test_image():
def test_link():
text = "parse link [[../../test.html][test]] after link"
expected = [
Token("TEXT", "parse link "),
Token("IMG", ["../../test.html", "test"]),
Token("TEXT", " after link"),
Token(tokens.TEXT, "parse link "),
Token(tokens.LINK, ["../../test.html", "test"]),
Token(tokens.TEXT, " after link"),
]
result = parse_text(text)
assert result[0].value == expected[0].value
@ -71,9 +72,9 @@ _I'm underlined text_
)
expected = [
Token("BREAK", ""),
Token(tokens.BLANK, ""),
Token(
"EXAMPLE",
tokens.EXAMPLE,
"""*I'm bold text*
/I'm italic text/
_I'm underlined text_
@ -94,8 +95,8 @@ head -n 5 examples/html-plain/example.py
)
expected = [
Token("BREAK", ""),
Token("SRC", """head -n 5 examples/html-plain/example.py\n"""),
Token(tokens.BLANK, ""),
Token(tokens.SOURCE, """head -n 5 examples/html-plain/example.py\n"""),
]
result = parse(text).doc
print(result)