Improved image handling and more tests.

This commit is contained in:
Oliver Marks 2018-11-27 22:28:35 +00:00
parent 17994c2eb7
commit 1e0377836d
8 changed files with 141 additions and 39 deletions

View File

@ -1,4 +1,13 @@
from eorg import tokens from eorg import tokens
from collections import namedtuple
TYPE_SINGLE = 0
TYPE_BLOCK = 1
TYPE_ATTRIBUTE = 2
TokenStruct = namedtuple(
"TokenStruct", ["start", "end", "type", "start_pos", "end_pos", "count", "key"]
)
TokenStruct.__new__.__defaults__ = ("", False, TYPE_SINGLE, 2, None, False, "")
ESCAPE = ["\n"] ESCAPE = ["\n"]
@ -26,9 +35,12 @@ t_TABLE_START = r"^\s*\|"
t_TABLE_END = r"^(?!\s*\|).*$" t_TABLE_END = r"^(?!\s*\|).*$"
t_RESULTS_START = r"^\#\+RESULTS:" t_RESULTS_START = r"^\#\+RESULTS:"
t_CAPTIONS = r"^\#\+CAPTION:" t_CAPTIONS = r"^\#\+CAPTION:"
t_IMG = r"^\[\[\s]]$" t_NAME = r"^\#\+NAME:"
#t_IMG = r"^\[\[(\w|\.|-|_|/)+\]\]$"
t_IMG = r"^\[\["
t_IMG_END = r"\]\]"
t_RESULTS_END = r"^\:..*" t_RESULTS_END = r"^\:..*"
t_END_LABELS = r"^(?!\[|\#).*"
t_BULLET_START = r"^\s*[\+|\-|0-9\.]" t_BULLET_START = r"^\s*[\+|\-|0-9\.]"
t_BULLET_END = r"^\s*(?![\+|\-|0-9]).*$" t_BULLET_END = r"^\s*(?![\+|\-|0-9]).*$"
@ -37,17 +49,38 @@ t_META_OTHER = r"^[#]\+[A-Z\_]+\:"
# Start regex, End regex, skip start, skip end, count matches # Start regex, End regex, skip start, skip end, count matches
TOKENS = { TOKENS = {
tokens.META: (t_META, False, 2, -1, False), tokens.META: TokenStruct(start=t_META, end_pos=-1),
tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False), tokens.COMMENT: TokenStruct(
tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False), start=t_COMMENT_BEGIN, end=t_COMMENT_END, type=TYPE_BLOCK, end_pos=-1
tokens.IMAGE: (t_IMG, False, 2, None, False), ),
tokens.CAPTION: (t_CAPTIONS, False, 2, None, False), tokens.EXAMPLE: TokenStruct(
tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False), start=t_EXAMPLE_BEGIN, end=t_EXAMPLE_END, type=TYPE_BLOCK, end_pos=-1
tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False), ),
tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False), tokens.IMAGE: TokenStruct(start=t_IMG,end_pos=-2),
tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False), tokens.CAPTION: TokenStruct(start=t_CAPTIONS, type=TYPE_ATTRIBUTE, key="CAPTION"),
tokens.HEADER: (t_HEADER, False, 1, None, True), tokens.SOURCE: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END),
tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False), tokens.TABLE: TokenStruct(
start=t_TABLE_START, end=t_TABLE_END, start_pos=0
),
tokens.BULLET: TokenStruct(
start=t_BULLET_START, end=t_BULLET_END, start_pos=0
),
tokens.RESULTS: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END),
tokens.HEADER: TokenStruct(start=t_HEADER, start_pos=1, count=True),
tokens.META_OTHER: TokenStruct(
start=t_META_OTHER, start_pos=2, end_pos=-1
),
# tokens.META: (t_META, False, 2, -1, False),
# tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False),
# tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False),
# tokens.IMAGE: (t_IMG, False, 2, None, False),
# tokens.CAPTION: (t_CAPTIONS, False, 2, None, False),
# tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
# tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False),
# tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False),
# tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False),
# tokens.HEADER: (t_HEADER, False, 1, None, True),
# tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False),
} }

View File

@ -19,7 +19,6 @@ def parse_img_or_link(char, step):
path += char path += char
char = next(step, None) char = next(step, None)
char = next(step, None) char = next(step, None)
alt = "" alt = ""
if char == "[": if char == "[":
char = next(step, None) char = next(step, None)
@ -29,6 +28,6 @@ def parse_img_or_link(char, step):
char = next(step, None) char = next(step, None)
if path.endswith(image_extensions): if path.endswith(image_extensions):
return "", Token(tokens.IMAGE, [path, alt]) return False, Token(tokens.IMAGE, [path, alt])
return "", Token(tokens.LINK, [path, alt]) return False, Token(tokens.LINK, [path, alt])

View File

@ -1,7 +1,13 @@
import re import re
from eorg import tokens from eorg import tokens
from eorg.tokens import Token from eorg.tokens import Token
from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions from eorg.const import (
TYPE_ATTRIBUTE,
TOKENS,
METADATA,
ESCAPE,
image_extensions,
)
from eorg.helper import parse_img_or_link from eorg.helper import parse_img_or_link
@ -71,6 +77,7 @@ class Document:
for item in self.__iter__(): for item in self.__iter__():
if item.token == tokens.IMAGE: if item.token == tokens.IMAGE:
yield item yield item
continue
if isinstance(item.value, list): if isinstance(item.value, list):
for token in item.value: for token in item.value:
@ -103,30 +110,43 @@ def parsebody(text, rx):
return rx, text + "\n" return rx, text + "\n"
def parseline(text): def parseline(text, stream):
attrs = None attrs = None
for key, (rx, block, s, e, count) in TOKENS.items(): for key, token in TOKENS.items():
match = re.search(rx, text) match = re.search(token.start, text)
if not match: if not match:
continue continue
value = text[match.end():] value = text[match.end() :]
level = len(match.group(0)) if token.type == TYPE_ATTRIBUTE:
if count is True: b, t = parseline(next(stream), stream)
attrs = {"depth": level} t.attrs = {token.key: value}
return (token.end, t)
if token.count is True:
attrs = {"depth": len(match.group(0))}
if key == tokens.META: if key == tokens.META:
return (block, Token(token=match.group(0)[s:e], value=value)) return (
token.end,
Token(
token=match.group(0)[token.start_pos:token.end_pos],
value=value,
),
)
if key == tokens.IMAGE:
return parse_img_or_link(text[0], iter(text[1:]))
if key == tokens.SOURCE: if key == tokens.SOURCE:
return block, Token(token=key, attrs=parse_attrs(value)) return token.end, Token(token=key, attrs=parse_attrs(value))
if key == tokens.TABLE: if key == tokens.TABLE:
return block, Token(token=key, value=text + "\n") return token.end, Token(token=key, value=text + "\n")
if key == tokens.BULLET: if key == tokens.BULLET:
return block, Token(token=key, value=text + "\n") return token.end, Token(token=key, value=text + "\n")
return block, Token(token=key, value=value, attrs=attrs) return token.end, Token(token=key, value=value, attrs=attrs)
text = text.strip() text = text.strip()
if text == "": if text == "":
@ -191,7 +211,7 @@ def parse_text(txt):
char = emphasis(char, step, "_", tokens.UNDERLINED) char = emphasis(char, step, "_", tokens.UNDERLINED)
char = emphasis(char, step, "=", tokens.VERBATIM) char = emphasis(char, step, "=", tokens.VERBATIM)
char = emphasis(char, step, "~", "PRE") char = emphasis(char, step, "~", "PRE")
#char = img(char, step) # char = img(char, step)
char, token = parse_img_or_link(char, step) char, token = parse_img_or_link(char, step)
if token: if token:
tokenlist.append(token) tokenlist.append(token)
@ -210,18 +230,26 @@ def parse_text(txt):
return tokenlist return tokenlist
def nextline(stream):
line = next(stream)
line = line.strip("\n")
yield line
def parse(stream): def parse(stream):
doc = Document() doc = Document()
block = False block = False
for line in stream: for line in stream:
# for line in nextline(stream):
line = line.strip("\n") line = line.strip("\n")
if block is not False: if block is not False:
block, token = parsebody(line, block) block, token = parsebody(line, block)
if block: if block:
doc.update(token) doc.update(token)
continue continue
block, token = parseline(line) block, token = parseline(line, stream)
if token: if token:
if doc.token() == tokens.LIST and token.token == tokens.LIST: if doc.token() == tokens.LIST and token.token == tokens.LIST:
doc.update(token.value) doc.update(token.value)

View File

@ -29,4 +29,3 @@ class Token:
def __repr__(self): def __repr__(self):
return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")' return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")'

View File

@ -25,6 +25,5 @@ def test_fetch_image_list():
Token(tokens.IMAGE, ["./images.jpg", ""]), Token(tokens.IMAGE, ["./images.jpg", ""]),
Token(tokens.IMAGE, ["./images.jpg", "test"]), Token(tokens.IMAGE, ["./images.jpg", "test"]),
] ]
images = [i for i in doc.images()] images = [i for i in doc.images()]
assert len(images) == 2 assert len(images) == 2

View File

@ -47,7 +47,7 @@ def test_image():
assert result[1].value == expected[1].value assert result[1].value == expected[1].value
assert result[2].value == expected[2].value assert result[2].value == expected[2].value
text = "[[../../../images/opengl/point-sprite-shader.png]]" text = StringIO("[[../../../images/opengl/point-sprite-shader.png]]")
expected = [ expected = [
Token( Token(
tokens.IMAGE, tokens.IMAGE,
@ -59,11 +59,35 @@ def test_image():
def test_image_with_caption(): def test_image_with_caption():
text = StringIO("""#+CAPTION: Test Image text = StringIO(
text [[../../test.jpg][test]]""") """#+CAPTION: Test Image
[[../../test.jpg]]"""
)
expected = [Token(tokens.IMAGE, ["../../test.jpg", ""])]
result = parse(text).doc
assert len(result) == 1
assert result[0].token == expected[0].token
assert result[0].value == expected[0].value
text = StringIO(
"""#+CAPTION: Test Image
[[../../test.jpg][test]]"""
)
expected = [Token(tokens.IMAGE, ["../../test.jpg", "test"])]
result = parse(text).doc
assert len(result) == 1
assert result[0].token == expected[0].token
assert result[0].value == expected[0].value
def test_multiple_images():
text = StringIO(
"""[[./images.jpg]]
[[./images.jpg][test]]"""
)
expected = [ expected = [
Token(tokens.CAPTION, " Test Image"), Token(tokens.IMAGE, ["./images.jpg", ""]),
Token(tokens.LIST, [Token(tokens.IMAGE, ["../../test.jpg", "test"])]), Token(tokens.IMAGE, ["./images.jpg", "test"]),
] ]
result = parse(text).doc result = parse(text).doc
assert len(result) == 2 assert len(result) == 2

View File

@ -1,5 +1,4 @@
import os import os
import pytest
from eorg.parser import parse from eorg.parser import parse
from eorg.generate import html from eorg.generate import html

View File

@ -1,9 +1,11 @@
import os import os
import re import re
import pytest import pytest
from io import StringIO
from eorg import const from eorg import const
from eorg.parser import parse from eorg.parser import parse
from eorg.generate import html from eorg.generate import html
from eorg.helper import parse_img_or_link
def test_meta_headers(): def test_meta_headers():
@ -76,3 +78,22 @@ def test_captions_regex():
rx = const.t_CAPTIONS rx = const.t_CAPTIONS
match = re.search(rx, text) match = re.search(rx, text)
assert match is not None assert match is not None
def test_image_regex():
token = const.TOKENS[const.tokens.IMAGE]
text = "[[../../image.jpg]]"
match = re.search(token.start, text)
assert match is not None
block, token = parse_img_or_link(text[0], iter(text[1:]))
assert token.value[0] == "../../image.jpg"
assert token.value[1] == ""
token = const.TOKENS[const.tokens.IMAGE]
text = "[[../../image.jpg][test]]"
match = re.search(token.start, text)
assert match is not None
block, token = parse_img_or_link(text[0], iter(text[1:]))
assert token.value[0] == "../../image.jpg"
assert token.value[1] == "test"