From 700d01923ca34cf65b2eb7e42b68126e36ee5f3f Mon Sep 17 00:00:00 2001 From: Oly Date: Wed, 28 Nov 2018 08:50:32 +0000 Subject: [PATCH] Improved image handling now captures CAPTIONS with test --- eorg/const.py | 11 +++++++---- eorg/parser.py | 5 ++++- eorg/version.py | 2 +- tests/test_document_parsing.py | 18 ++++++++++++++++-- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/eorg/const.py b/eorg/const.py index c171cd5..bdb1c1e 100755 --- a/eorg/const.py +++ b/eorg/const.py @@ -5,7 +5,8 @@ TYPE_SINGLE = 0 TYPE_BLOCK = 1 TYPE_ATTRIBUTE = 2 TokenStruct = namedtuple( - "TokenStruct", ["start", "end", "type", "start_pos", "end_pos", "count", "key"] + "TokenStruct", + ["start", "end", "type", "start_pos", "end_pos", "count", "key"], ) TokenStruct.__new__.__defaults__ = ("", False, TYPE_SINGLE, 2, None, False, "") @@ -36,7 +37,7 @@ t_TABLE_END = r"^(?!\s*\|).*$" t_RESULTS_START = r"^\#\+RESULTS:" t_CAPTIONS = r"^\#\+CAPTION:" t_NAME = r"^\#\+NAME:" -#t_IMG = r"^\[\[(\w|\.|-|_|/)+\]\]$" +# t_IMG = r"^\[\[(\w|\.|-|_|/)+\]\]$" t_IMG = r"^\[\[" t_IMG_END = r"\]\]" t_RESULTS_END = r"^\:..*" @@ -56,8 +57,10 @@ TOKENS = { tokens.EXAMPLE: TokenStruct( start=t_EXAMPLE_BEGIN, end=t_EXAMPLE_END, type=TYPE_BLOCK, end_pos=-1 ), - tokens.IMAGE: TokenStruct(start=t_IMG,end_pos=-2), - tokens.CAPTION: TokenStruct(start=t_CAPTIONS, type=TYPE_ATTRIBUTE, key="CAPTION"), + tokens.IMAGE: TokenStruct(start=t_IMG, end_pos=-2), + tokens.CAPTION: TokenStruct( + start=t_CAPTIONS, type=TYPE_ATTRIBUTE, key="CAPTION" + ), tokens.SOURCE: TokenStruct(start=t_SRC_BEGIN, end=t_SRC_END), tokens.TABLE: TokenStruct( start=t_TABLE_START, end=t_TABLE_END, start_pos=0 diff --git a/eorg/parser.py b/eorg/parser.py index df305b2..f72b832 100644 --- a/eorg/parser.py +++ b/eorg/parser.py @@ -79,6 +79,9 @@ class Document: yield item continue + if item.token != tokens.LIST: + continue + if isinstance(item.value, list): for token in item.value: if token.token == tokens.IMAGE: @@ -106,13 +109,13 @@ def parsebody(text, rx): match = re.search(rx, text) if match: return False, None - return rx, text + "\n" def parseline(text, stream): attrs = None for key, token in TOKENS.items(): + print(token) match = re.search(token.start, text) if not match: continue diff --git a/eorg/version.py b/eorg/version.py index 74e60c7..9d64552 100755 --- a/eorg/version.py +++ b/eorg/version.py @@ -1 +1 @@ -__version__=0.70 +__version__=0.80 diff --git a/tests/test_document_parsing.py b/tests/test_document_parsing.py index 4824720..8f9d16b 100644 --- a/tests/test_document_parsing.py +++ b/tests/test_document_parsing.py @@ -63,21 +63,35 @@ def test_image_with_caption(): """#+CAPTION: Test Image [[../../test.jpg]]""" ) - expected = [Token(tokens.IMAGE, ["../../test.jpg", ""])] + expected = [ + Token( + tokens.IMAGE, + ["../../test.jpg", ""], + attrs={"CAPTION": " Test Image"}, + ) + ] result = parse(text).doc assert len(result) == 1 assert result[0].token == expected[0].token assert result[0].value == expected[0].value + assert result[0].attrs == expected[0].attrs text = StringIO( """#+CAPTION: Test Image [[../../test.jpg][test]]""" ) - expected = [Token(tokens.IMAGE, ["../../test.jpg", "test"])] + expected = [ + Token( + tokens.IMAGE, + ["../../test.jpg", "test"], + attrs={"CAPTION": " Test Image"}, + ) + ] result = parse(text).doc assert len(result) == 1 assert result[0].token == expected[0].token assert result[0].value == expected[0].value + assert result[0].attrs == expected[0].attrs def test_multiple_images():