Improved image handling and more tests.

This commit is contained in:
Oliver Marks 2018-11-27 22:28:35 +00:00
parent 17994c2eb7
commit 1e0377836d
8 changed files with 141 additions and 39 deletions

View File

@ -1,4 +1,13 @@
from eorg import tokens
from collections import namedtuple
TYPE_SINGLE = 0
TYPE_BLOCK = 1
TYPE_ATTRIBUTE = 2
TokenStruct = namedtuple(
"TokenStruct", ["start", "end", "type", "start_pos", "end_pos", "count", "key"]
)
TokenStruct.__new__.__defaults__ = ("", False, TYPE_SINGLE, 2, None, False, "")
ESCAPE = ["\n"]
@ -26,9 +35,12 @@ t_TABLE_START = r"^\s*\|"
t_TABLE_END = r"^(?!\s*\|).*$"
t_RESULTS_START = r"^\#\+RESULTS:"
t_CAPTIONS = r"^\#\+CAPTION:"
t_IMG = r"^\[\[\s]]$"
t_NAME = r"^\#\+NAME:"
#t_IMG = r"^\[\[(\w|\.|-|_|/)+\]\]$"
t_IMG = r"^\[\["
t_IMG_END = r"\]\]"
t_RESULTS_END = r"^\:..*"
t_END_LABELS = r"^(?!\[|\#).*"
t_BULLET_START = r"^\s*[\+|\-|0-9\.]"
t_BULLET_END = r"^\s*(?![\+|\-|0-9]).*$"
@ -37,17 +49,38 @@ t_META_OTHER = r"^[#]\+[A-Z\_]+\:"
# Start regex, End regex, skip start, skip end, count matches
TOKENS = {
tokens.META: (t_META, False, 2, -1, False),
tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
tokens.IMAGE: (t_IMG, False, 2, None, False),
tokens.CAPTION: (t_CAPTIONS, False, 2, None, False),
tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False),
tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False),
tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
tokens.HEADER: (t_HEADER, False, 1, None, True),
tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False),
tokens.META: TokenStruct(start=t_META, end_pos=-1),
tokens.COMMENT: TokenStruct(
start=t_COMMENT_BEGIN, end=t_COMMENT_END, type=TYPE_BLOCK, end_pos=-1
),
tokens.EXAMPLE: TokenStruct(
start=t_EXAMPLE_BEGIN, end=t_EXAMPLE_END, type=TYPE_BLOCK, end_pos=-1
),
tokens.IMAGE: TokenStruct(start=t_IMG,end_pos=-2),
tokens.CAPTION: TokenStruct(start=t_CAPTIONS, type=TYPE_ATTRIBUTE, key="CAPTION"),
tokens.SOURCE: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END),
tokens.TABLE: TokenStruct(
start=t_TABLE_START, end=t_TABLE_END, start_pos=0
),
tokens.BULLET: TokenStruct(
start=t_BULLET_START, end=t_BULLET_END, start_pos=0
),
tokens.RESULTS: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END),
tokens.HEADER: TokenStruct(start=t_HEADER, start_pos=1, count=True),
tokens.META_OTHER: TokenStruct(
start=t_META_OTHER, start_pos=2, end_pos=-1
),
# tokens.META: (t_META, False, 2, -1, False),
# tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
# tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
# tokens.IMAGE: (t_IMG, False, 2, None, False),
# tokens.CAPTION: (t_CAPTIONS, False, 2, None, False),
# tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
# tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False),
# tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False),
# tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
# tokens.HEADER: (t_HEADER, False, 1, None, True),
# tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False),
}

View File

@ -19,7 +19,6 @@ def parse_img_or_link(char, step):
path += char
char = next(step, None)
char = next(step, None)
alt = ""
if char == "[":
char = next(step, None)
@ -29,6 +28,6 @@ def parse_img_or_link(char, step):
char = next(step, None)
if path.endswith(image_extensions):
return "", Token(tokens.IMAGE, [path, alt])
return False, Token(tokens.IMAGE, [path, alt])
return "", Token(tokens.LINK, [path, alt])
return False, Token(tokens.LINK, [path, alt])

View File

@ -1,7 +1,13 @@
import re
from eorg import tokens
from eorg.tokens import Token
from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions
from eorg.const import (
TYPE_ATTRIBUTE,
TOKENS,
METADATA,
ESCAPE,
image_extensions,
)
from eorg.helper import parse_img_or_link
@ -71,6 +77,7 @@ class Document:
for item in self.__iter__():
if item.token == tokens.IMAGE:
yield item
continue
if isinstance(item.value, list):
for token in item.value:
@ -103,30 +110,43 @@ def parsebody(text, rx):
return rx, text + "\n"
def parseline(text):
def parseline(text, stream):
attrs = None
for key, (rx, block, s, e, count) in TOKENS.items():
match = re.search(rx, text)
for key, token in TOKENS.items():
match = re.search(token.start, text)
if not match:
continue
value = text[match.end():]
level = len(match.group(0))
if count is True:
attrs = {"depth": level}
value = text[match.end() :]
if token.type == TYPE_ATTRIBUTE:
b, t = parseline(next(stream), stream)
t.attrs = {token.key: value}
return (token.end, t)
if token.count is True:
attrs = {"depth": len(match.group(0))}
if key == tokens.META:
return (block, Token(token=match.group(0)[s:e], value=value))
return (
token.end,
Token(
token=match.group(0)[token.start_pos:token.end_pos],
value=value,
),
)
if key == tokens.IMAGE:
return parse_img_or_link(text[0], iter(text[1:]))
if key == tokens.SOURCE:
return block, Token(token=key, attrs=parse_attrs(value))
return token.end, Token(token=key, attrs=parse_attrs(value))
if key == tokens.TABLE:
return block, Token(token=key, value=text + "\n")
return token.end, Token(token=key, value=text + "\n")
if key == tokens.BULLET:
return block, Token(token=key, value=text + "\n")
return token.end, Token(token=key, value=text + "\n")
return block, Token(token=key, value=value, attrs=attrs)
return token.end, Token(token=key, value=value, attrs=attrs)
text = text.strip()
if text == "":
@ -191,7 +211,7 @@ def parse_text(txt):
char = emphasis(char, step, "_", tokens.UNDERLINED)
char = emphasis(char, step, "=", tokens.VERBATIM)
char = emphasis(char, step, "~", "PRE")
#char = img(char, step)
# char = img(char, step)
char, token = parse_img_or_link(char, step)
if token:
tokenlist.append(token)
@ -210,18 +230,26 @@ def parse_text(txt):
return tokenlist
def nextline(stream):
line = next(stream)
line = line.strip("\n")
yield line
def parse(stream):
doc = Document()
block = False
for line in stream:
# for line in nextline(stream):
line = line.strip("\n")
if block is not False:
block, token = parsebody(line, block)
if block:
doc.update(token)
continue
block, token = parseline(line)
block, token = parseline(line, stream)
if token:
if doc.token() == tokens.LIST and token.token == tokens.LIST:
doc.update(token.value)

View File

@ -29,4 +29,3 @@ class Token:
def __repr__(self):
return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")'

View File

@ -25,6 +25,5 @@ def test_fetch_image_list():
Token(tokens.IMAGE, ["./images.jpg", ""]),
Token(tokens.IMAGE, ["./images.jpg", "test"]),
]
images = [i for i in doc.images()]
assert len(images) == 2

View File

@ -47,7 +47,7 @@ def test_image():
assert result[1].value == expected[1].value
assert result[2].value == expected[2].value
text = "[[../../../images/opengl/point-sprite-shader.png]]"
text = StringIO("[[../../../images/opengl/point-sprite-shader.png]]")
expected = [
Token(
tokens.IMAGE,
@ -59,11 +59,35 @@ def test_image():
def test_image_with_caption():
text = StringIO("""#+CAPTION: Test Image
text [[../../test.jpg][test]]""")
text = StringIO(
"""#+CAPTION: Test Image
[[../../test.jpg]]"""
)
expected = [Token(tokens.IMAGE, ["../../test.jpg", ""])]
result = parse(text).doc
assert len(result) == 1
assert result[0].token == expected[0].token
assert result[0].value == expected[0].value
text = StringIO(
"""#+CAPTION: Test Image
[[../../test.jpg][test]]"""
)
expected = [Token(tokens.IMAGE, ["../../test.jpg", "test"])]
result = parse(text).doc
assert len(result) == 1
assert result[0].token == expected[0].token
assert result[0].value == expected[0].value
def test_multiple_images():
text = StringIO(
"""[[./images.jpg]]
[[./images.jpg][test]]"""
)
expected = [
Token(tokens.CAPTION, " Test Image"),
Token(tokens.LIST, [Token(tokens.IMAGE, ["../../test.jpg", "test"])]),
Token(tokens.IMAGE, ["./images.jpg", ""]),
Token(tokens.IMAGE, ["./images.jpg", "test"]),
]
result = parse(text).doc
assert len(result) == 2

View File

@ -1,5 +1,4 @@
import os
import pytest
from eorg.parser import parse
from eorg.generate import html

View File

@ -1,9 +1,11 @@
import os
import re
import pytest
from io import StringIO
from eorg import const
from eorg.parser import parse
from eorg.generate import html
from eorg.helper import parse_img_or_link
def test_meta_headers():
@ -76,3 +78,22 @@ def test_captions_regex():
rx = const.t_CAPTIONS
match = re.search(rx, text)
assert match is not None
def test_image_regex():
token = const.TOKENS[const.tokens.IMAGE]
text = "[[../../image.jpg]]"
match = re.search(token.start, text)
assert match is not None
block, token = parse_img_or_link(text[0], iter(text[1:]))
assert token.value[0] == "../../image.jpg"
assert token.value[1] == ""
token = const.TOKENS[const.tokens.IMAGE]
text = "[[../../image.jpg][test]]"
match = re.search(token.start, text)
assert match is not None
block, token = parse_img_or_link(text[0], iter(text[1:]))
assert token.value[0] == "../../image.jpg"
assert token.value[1] == "test"