Added secondary image scanning test.

This commit is contained in:
Oliver Marks 2018-11-26 07:17:59 +00:00
parent 483606e95b
commit ac899b8ada
3 changed files with 68 additions and 39 deletions

View File

@ -18,7 +18,9 @@ class Document:
if not idx: if not idx:
if default is not None: if default is not None:
return default return default
raise AttributeError(f"Attribute of {name} does not exist in document") raise AttributeError(
f"Attribute of {name} does not exist in document"
)
if len(idx) == 1: if len(idx) == 1:
return self.doc[idx[0]].value return self.doc[idx[0]].value
return [self.doc[v].value for v in idx] return [self.doc[v].value for v in idx]
@ -40,9 +42,9 @@ class Document:
def previous(self, match): def previous(self, match):
if self.pos is 0: if self.pos is 0:
return None return None
if self.doc[self.pos-1].token != match: if self.doc[self.pos - 1].token != match:
return None return None
return self.doc[self.pos-1] return self.doc[self.pos - 1]
def filter(self, value): def filter(self, value):
"""Only return types that are of intrest like source blocks""" """Only return types that are of intrest like source blocks"""
@ -59,12 +61,11 @@ class Document:
def images(self): def images(self):
for item in self.__iter__(): for item in self.__iter__():
if item.token == tokens.IMAGE: if item.token == tokens.IMAGE:
yield item.value[0] yield item
if item.token == tokens.TEXT: if isinstance(item.value, list):
if isinstance(item.value, list): for token in item.value:
for token in item.value: if token.token == tokens.IMAGE:
if token.token == tokens.IMAGE: yield token
yield token
def __len__(self): def __len__(self):
return len(self.doc) return len(self.doc)
@ -73,23 +74,26 @@ class Document:
self.index.setdefault(value.token, []).append(len(self.doc)) self.index.setdefault(value.token, []).append(len(self.doc))
self.doc.append(value) self.doc.append(value)
def parse_attrs(text): def parse_attrs(text):
attrs = {} attrs = {}
value_list = text.split(':') value_list = text.split(":")
attrs['language'] = value_list.pop(0).strip() attrs["language"] = value_list.pop(0).strip()
for row in value_list: for row in value_list:
values = row.strip().split(' ') values = row.strip().split(" ")
attrs[values[0]] = values[1:] attrs[values[0]] = values[1:]
return attrs return attrs
def parsebody(text, rx): def parsebody(text, rx):
match = re.search(rx, text) match = re.search(rx, text)
if match: if match:
return False, None return False, None
return rx, text + "\n" return rx, text + "\n"
def parseline(text): def parseline(text):
attrs=None attrs = None
for key, (rx, block, s, e, count) in TOKENS.items(): for key, (rx, block, s, e, count) in TOKENS.items():
match = re.search(rx, text) match = re.search(rx, text)
if not match: if not match:
@ -97,18 +101,15 @@ def parseline(text):
value = text[match.end() :] value = text[match.end() :]
level = len(match.group(0)) level = len(match.group(0))
if count is True: if count is True:
attrs={'depth': level} attrs = {"depth": level}
if key == tokens.META: if key == tokens.META:
return ( return (block, Token(token=match.group(0)[s:e], value=value))
block,
Token(token=match.group(0)[s:e], value=value),
)
if key == tokens.SOURCE: if key == tokens.SOURCE:
return block, Token(token=key, attrs=parse_attrs(value)) return block, Token(token=key, attrs=parse_attrs(value))
if key == tokens.TABLE: if key == tokens.TABLE:
return block, Token(token=key, value=text+"\n") return block, Token(token=key, value=text + "\n")
if key == tokens.BULLET: if key == tokens.BULLET:
return block, Token(token=key, value=text+"\n") return block, Token(token=key, value=text + "\n")
return block, Token(token=key, value=value, attrs=attrs) return block, Token(token=key, value=value, attrs=attrs)
text = text.strip() text = text.strip()
@ -122,54 +123,54 @@ def parse_text(txt):
tokenlist = [] tokenlist = []
def img(char, step): def img(char, step):
if char != '[': if char != "[":
return char return char
char = next(step, None) char = next(step, None)
if char != '[': if char != "[":
return char return char
char = next(step, None) char = next(step, None)
path = '' path = ""
while char not in [']'] + ESCAPE: while char not in ["]"] + ESCAPE:
path += char path += char
char = next(step, None) char = next(step, None)
char = next(step, None) char = next(step, None)
alt = '' alt = ""
if char == '[': if char == "[":
char = next(step, None) char = next(step, None)
while char not in [']'] + ESCAPE: while char not in ["]"] + ESCAPE:
alt += char alt += char
char = next(step, None) char = next(step, None)
char = next(step, None) char = next(step, None)
if path.endswith(image_extensions): if path.endswith(image_extensions):
tokenlist.append(Token(tokens.IMAGE, [path, alt])) tokenlist.append(Token(tokens.IMAGE, [path, alt]))
return '' return ""
tokenlist.append(Token(tokens.LINK, [path, alt])) tokenlist.append(Token(tokens.LINK, [path, alt]))
return '' return ""
def emphasis(char, step, end, tag): def emphasis(char, step, end, tag):
if not char or char!=end: if not char or char != end:
return char return char
char = next(step, None) char = next(step, None)
r = '' r = ""
while char and char not in [end] + ESCAPE: while char and char not in [end] + ESCAPE:
r += char r += char
char = next(step, None) char = next(step, None)
tokenlist.append(Token(tag, r)) tokenlist.append(Token(tag, r))
return '' return ""
step = iter(txt) step = iter(txt)
while char is not None: while char is not None:
char = next(step, None) char = next(step, None)
char = emphasis(char, step, '*', tokens.BOLD) char = emphasis(char, step, "*", tokens.BOLD)
char = emphasis(char, step, '/', tokens.ITALIC) char = emphasis(char, step, "/", tokens.ITALIC)
char = emphasis(char, step, '_', tokens.UNDERLINED) char = emphasis(char, step, "_", tokens.UNDERLINED)
char = emphasis(char, step, '=', tokens.VERBATIM) char = emphasis(char, step, "=", tokens.VERBATIM)
char = emphasis(char, step, '~', 'PRE') char = emphasis(char, step, "~", "PRE")
char = img(char, step) char = img(char, step)
if not char: if not char:
continue continue
@ -187,7 +188,7 @@ def parse(stream):
doc = Document() doc = Document()
block = False block = False
for line in stream: for line in stream:
line = line.strip('\n') line = line.strip("\n")
if block is not False: if block is not False:
block, token = parsebody(line, block) block, token = parsebody(line, block)
if block: if block:

13
tests/fixtures/test_images.org vendored Normal file
View File

@ -0,0 +1,13 @@
#+TITLE: Emacs org-mode tables
#+AUTHOR: Eric H. Neilsen, Jr.
#+EMAIL: neilsen@fnal.gov
#+DATE: jkkj
#+KEYWORDS: emacs, orgmode, tests
#+DESCRIPTION: Test DESCRIPTION
#+KEYWORDS: key1, key2
Images
[[./images.jpg]]
[[./images.jpg][test]]

View File

@ -1,15 +1,30 @@
import os import os
import pytest import pytest
from eorg import tokens
from eorg.tokens import Token
from eorg.parser import parse from eorg.parser import parse
def test_fetch_attribute(): def test_fetch_attribute():
with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp: with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
doc = parse(fp) doc = parse(fp)
assert doc.title == ' Emacs org-mode examples' assert doc.title == " Emacs org-mode examples"
def test_fetch_non_existant_attribute(): def test_fetch_non_existant_attribute():
with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp: with open(os.path.abspath("./tests/fixtures/test.org"), "r") as fp:
doc = parse(fp) doc = parse(fp)
with pytest.raises(AttributeError): with pytest.raises(AttributeError):
doc.fake doc.fake
def test_fetch_image_list():
with open(os.path.abspath("./tests/fixtures/test_images.org"), "r") as fp:
doc = parse(fp)
expected = [
Token(tokens.IMAGE, ["./images.jpg", ""]),
Token(tokens.IMAGE, ["./images.jpg", "test"]),
]
images = [i for i in doc.images()]
assert len(images) == 2