From 1e0377836da6f7c7786ebbcad309c1727901782b Mon Sep 17 00:00:00 2001 From: Oliver Marks Date: Tue, 27 Nov 2018 22:28:35 +0000 Subject: [PATCH] Improved image handling and more tests. --- eorg/const.py | 59 +++++++++++++++++++++++++------- eorg/helper.py | 5 ++- eorg/parser.py | 58 +++++++++++++++++++++++-------- eorg/tokens.py | 1 - tests/test_document_functions.py | 1 - tests/test_document_parsing.py | 34 +++++++++++++++--- tests/test_documents.py | 1 - tests/test_regex.py | 21 ++++++++++++ 8 files changed, 141 insertions(+), 39 deletions(-) diff --git a/eorg/const.py b/eorg/const.py index b9a507b..c171cd5 100755 --- a/eorg/const.py +++ b/eorg/const.py @@ -1,4 +1,13 @@ from eorg import tokens +from collections import namedtuple + +TYPE_SINGLE = 0 +TYPE_BLOCK = 1 +TYPE_ATTRIBUTE = 2 +TokenStruct = namedtuple( + "TokenStruct", ["start", "end", "type", "start_pos", "end_pos", "count", "key"] +) +TokenStruct.__new__.__defaults__ = ("", False, TYPE_SINGLE, 2, None, False, "") ESCAPE = ["\n"] @@ -26,9 +35,12 @@ t_TABLE_START = r"^\s*\|" t_TABLE_END = r"^(?!\s*\|).*$" t_RESULTS_START = r"^\#\+RESULTS:" t_CAPTIONS = r"^\#\+CAPTION:" -t_IMG = r"^\[\[\s]]$" +t_NAME = r"^\#\+NAME:" +#t_IMG = r"^\[\[(\w|\.|-|_|/)+\]\]$" +t_IMG = r"^\[\[" +t_IMG_END = r"\]\]" t_RESULTS_END = r"^\:..*" - +t_END_LABELS = r"^(?!\[|\#).*" t_BULLET_START = r"^\s*[\+|\-|0-9\.]" t_BULLET_END = r"^\s*(?![\+|\-|0-9]).*$" @@ -37,17 +49,38 @@ t_META_OTHER = r"^[#]\+[A-Z\_]+\:" # Start regex, End regex, skip start, skip end, count matches TOKENS = { - tokens.META: (t_META, False, 2, -1, False), - tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False), - tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False), - tokens.IMAGE: (t_IMG, False, 2, None, False), - tokens.CAPTION: (t_CAPTIONS, False, 2, None, False), - tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False), - tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False), - tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False), - tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False), - tokens.HEADER: (t_HEADER, False, 1, None, True), - tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False), + tokens.META: TokenStruct(start=t_META, end_pos=-1), + tokens.COMMENT: TokenStruct( + start=t_COMMENT_BEGIN, end=t_COMMENT_END, type=TYPE_BLOCK, end_pos=-1 + ), + tokens.EXAMPLE: TokenStruct( + start=t_EXAMPLE_BEGIN, end=t_EXAMPLE_END, type=TYPE_BLOCK, end_pos=-1 + ), + tokens.IMAGE: TokenStruct(start=t_IMG,end_pos=-2), + tokens.CAPTION: TokenStruct(start=t_CAPTIONS, type=TYPE_ATTRIBUTE, key="CAPTION"), + tokens.SOURCE: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END), + tokens.TABLE: TokenStruct( + start=t_TABLE_START, end=t_TABLE_END, start_pos=0 + ), + tokens.BULLET: TokenStruct( + start=t_BULLET_START, end=t_BULLET_END, start_pos=0 + ), + tokens.RESULTS: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END), + tokens.HEADER: TokenStruct(start=t_HEADER, start_pos=1, count=True), + tokens.META_OTHER: TokenStruct( + start=t_META_OTHER, start_pos=2, end_pos=-1 + ), + # tokens.META: (t_META, False, 2, -1, False), + # tokens.COMMENT: (t_COMMENT_BEGIN, t_COMMENT_END, 2, None, False), + # tokens.EXAMPLE: (t_EXAMPLE_BEGIN, t_EXAMPLE_END, 2, None, False), + # tokens.IMAGE: (t_IMG, False, 2, None, False), + # tokens.CAPTION: (t_CAPTIONS, False, 2, None, False), + # tokens.SOURCE: (t_SRC_BEGIN, t_SRC_END, 2, None, False), + # tokens.TABLE: (t_TABLE_START, t_TABLE_END, 0, None, False), + # tokens.BULLET: (t_BULLET_START, t_BULLET_END, 0, None, False), + # tokens.RESULTS: (t_SRC_BEGIN, t_SRC_END, 2, None, False), + # tokens.HEADER: (t_HEADER, False, 1, None, True), + # tokens.META_OTHER: (t_META_OTHER, False, 2, -1, False), } diff --git a/eorg/helper.py b/eorg/helper.py index 2a70710..cea0bc0 100644 --- a/eorg/helper.py +++ b/eorg/helper.py @@ -19,7 +19,6 @@ def parse_img_or_link(char, step): path += char char = next(step, None) char = next(step, None) - alt = "" if char == "[": char = next(step, None) @@ -29,6 +28,6 @@ def parse_img_or_link(char, step): char = next(step, None) if path.endswith(image_extensions): - return "", Token(tokens.IMAGE, [path, alt]) + return False, Token(tokens.IMAGE, [path, alt]) - return "", Token(tokens.LINK, [path, alt]) + return False, Token(tokens.LINK, [path, alt]) diff --git a/eorg/parser.py b/eorg/parser.py index 741a60d..df305b2 100644 --- a/eorg/parser.py +++ b/eorg/parser.py @@ -1,7 +1,13 @@ import re from eorg import tokens from eorg.tokens import Token -from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions +from eorg.const import ( + TYPE_ATTRIBUTE, + TOKENS, + METADATA, + ESCAPE, + image_extensions, +) from eorg.helper import parse_img_or_link @@ -71,6 +77,7 @@ class Document: for item in self.__iter__(): if item.token == tokens.IMAGE: yield item + continue if isinstance(item.value, list): for token in item.value: @@ -103,30 +110,43 @@ def parsebody(text, rx): return rx, text + "\n" -def parseline(text): +def parseline(text, stream): attrs = None - for key, (rx, block, s, e, count) in TOKENS.items(): - match = re.search(rx, text) + for key, token in TOKENS.items(): + match = re.search(token.start, text) if not match: continue - value = text[match.end():] - level = len(match.group(0)) - if count is True: - attrs = {"depth": level} + value = text[match.end() :] + if token.type == TYPE_ATTRIBUTE: + b, t = parseline(next(stream), stream) + t.attrs = {token.key: value} + return (token.end, t) + + if token.count is True: + attrs = {"depth": len(match.group(0))} if key == tokens.META: - return (block, Token(token=match.group(0)[s:e], value=value)) + return ( + token.end, + Token( + token=match.group(0)[token.start_pos:token.end_pos], + value=value, + ), + ) + + if key == tokens.IMAGE: + return parse_img_or_link(text[0], iter(text[1:])) if key == tokens.SOURCE: - return block, Token(token=key, attrs=parse_attrs(value)) + return token.end, Token(token=key, attrs=parse_attrs(value)) if key == tokens.TABLE: - return block, Token(token=key, value=text + "\n") + return token.end, Token(token=key, value=text + "\n") if key == tokens.BULLET: - return block, Token(token=key, value=text + "\n") + return token.end, Token(token=key, value=text + "\n") - return block, Token(token=key, value=value, attrs=attrs) + return token.end, Token(token=key, value=value, attrs=attrs) text = text.strip() if text == "": @@ -191,7 +211,7 @@ def parse_text(txt): char = emphasis(char, step, "_", tokens.UNDERLINED) char = emphasis(char, step, "=", tokens.VERBATIM) char = emphasis(char, step, "~", "PRE") - #char = img(char, step) + # char = img(char, step) char, token = parse_img_or_link(char, step) if token: tokenlist.append(token) @@ -210,18 +230,26 @@ def parse_text(txt): return tokenlist +def nextline(stream): + line = next(stream) + line = line.strip("\n") + yield line + + def parse(stream): doc = Document() block = False for line in stream: + # for line in nextline(stream): line = line.strip("\n") + if block is not False: block, token = parsebody(line, block) if block: doc.update(token) continue - block, token = parseline(line) + block, token = parseline(line, stream) if token: if doc.token() == tokens.LIST and token.token == tokens.LIST: doc.update(token.value) diff --git a/eorg/tokens.py b/eorg/tokens.py index 62ae4a6..f2c154a 100644 --- a/eorg/tokens.py +++ b/eorg/tokens.py @@ -29,4 +29,3 @@ class Token: def __repr__(self): return f'Token(token="{self.token}", value="{self.value}", attrs="{self.attrs}")' - diff --git a/tests/test_document_functions.py b/tests/test_document_functions.py index 46c37b4..4eafc6b 100644 --- a/tests/test_document_functions.py +++ b/tests/test_document_functions.py @@ -25,6 +25,5 @@ def test_fetch_image_list(): Token(tokens.IMAGE, ["./images.jpg", ""]), Token(tokens.IMAGE, ["./images.jpg", "test"]), ] - images = [i for i in doc.images()] assert len(images) == 2 diff --git a/tests/test_document_parsing.py b/tests/test_document_parsing.py index 5f243b7..4824720 100644 --- a/tests/test_document_parsing.py +++ b/tests/test_document_parsing.py @@ -47,7 +47,7 @@ def test_image(): assert result[1].value == expected[1].value assert result[2].value == expected[2].value - text = "[[../../../images/opengl/point-sprite-shader.png]]" + text = StringIO("[[../../../images/opengl/point-sprite-shader.png]]") expected = [ Token( tokens.IMAGE, @@ -59,11 +59,35 @@ def test_image(): def test_image_with_caption(): - text = StringIO("""#+CAPTION: Test Image -text [[../../test.jpg][test]]""") + text = StringIO( + """#+CAPTION: Test Image +[[../../test.jpg]]""" + ) + expected = [Token(tokens.IMAGE, ["../../test.jpg", ""])] + result = parse(text).doc + assert len(result) == 1 + assert result[0].token == expected[0].token + assert result[0].value == expected[0].value + + text = StringIO( + """#+CAPTION: Test Image +[[../../test.jpg][test]]""" + ) + expected = [Token(tokens.IMAGE, ["../../test.jpg", "test"])] + result = parse(text).doc + assert len(result) == 1 + assert result[0].token == expected[0].token + assert result[0].value == expected[0].value + + +def test_multiple_images(): + text = StringIO( + """[[./images.jpg]] +[[./images.jpg][test]]""" + ) expected = [ - Token(tokens.CAPTION, " Test Image"), - Token(tokens.LIST, [Token(tokens.IMAGE, ["../../test.jpg", "test"])]), + Token(tokens.IMAGE, ["./images.jpg", ""]), + Token(tokens.IMAGE, ["./images.jpg", "test"]), ] result = parse(text).doc assert len(result) == 2 diff --git a/tests/test_documents.py b/tests/test_documents.py index 4135903..92226a3 100755 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -1,5 +1,4 @@ import os -import pytest from eorg.parser import parse from eorg.generate import html diff --git a/tests/test_regex.py b/tests/test_regex.py index f80c4a1..e9718f3 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -1,9 +1,11 @@ import os import re import pytest +from io import StringIO from eorg import const from eorg.parser import parse from eorg.generate import html +from eorg.helper import parse_img_or_link def test_meta_headers(): @@ -76,3 +78,22 @@ def test_captions_regex(): rx = const.t_CAPTIONS match = re.search(rx, text) assert match is not None + + +def test_image_regex(): + token = const.TOKENS[const.tokens.IMAGE] + text = "[[../../image.jpg]]" + match = re.search(token.start, text) + assert match is not None + block, token = parse_img_or_link(text[0], iter(text[1:])) + assert token.value[0] == "../../image.jpg" + assert token.value[1] == "" + + token = const.TOKENS[const.tokens.IMAGE] + text = "[[../../image.jpg][test]]" + match = re.search(token.start, text) + assert match is not None + + block, token = parse_img_or_link(text[0], iter(text[1:])) + assert token.value[0] == "../../image.jpg" + assert token.value[1] == "test"