Working on improved image parsing for captions
This commit is contained in:
parent
ac899b8ada
commit
17994c2eb7
|
@ -0,0 +1,34 @@
|
||||||
|
from eorg import tokens
|
||||||
|
from eorg.tokens import Token
|
||||||
|
from eorg.const import ESCAPE, image_extensions
|
||||||
|
|
||||||
|
|
||||||
|
def parse_img_or_link(char, step):
|
||||||
|
if char != "[":
|
||||||
|
return char, None
|
||||||
|
|
||||||
|
char = next(step, None)
|
||||||
|
|
||||||
|
if char != "[":
|
||||||
|
return char, None
|
||||||
|
|
||||||
|
char = next(step, None)
|
||||||
|
|
||||||
|
path = ""
|
||||||
|
while char not in ["]"] + ESCAPE:
|
||||||
|
path += char
|
||||||
|
char = next(step, None)
|
||||||
|
char = next(step, None)
|
||||||
|
|
||||||
|
alt = ""
|
||||||
|
if char == "[":
|
||||||
|
char = next(step, None)
|
||||||
|
while char not in ["]"] + ESCAPE:
|
||||||
|
alt += char
|
||||||
|
char = next(step, None)
|
||||||
|
char = next(step, None)
|
||||||
|
|
||||||
|
if path.endswith(image_extensions):
|
||||||
|
return "", Token(tokens.IMAGE, [path, alt])
|
||||||
|
|
||||||
|
return "", Token(tokens.LINK, [path, alt])
|
|
@ -2,6 +2,7 @@ import re
|
||||||
from eorg import tokens
|
from eorg import tokens
|
||||||
from eorg.tokens import Token
|
from eorg.tokens import Token
|
||||||
from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions
|
from eorg.const import TOKENS, METADATA, ESCAPE, image_extensions
|
||||||
|
from eorg.helper import parse_img_or_link
|
||||||
|
|
||||||
|
|
||||||
class Document:
|
class Document:
|
||||||
|
@ -18,16 +19,20 @@ class Document:
|
||||||
if not idx:
|
if not idx:
|
||||||
if default is not None:
|
if default is not None:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
f"Attribute of {name} does not exist in document"
|
f"Attribute of {name} does not exist in document"
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(idx) == 1:
|
if len(idx) == 1:
|
||||||
return self.doc[idx[0]].value
|
return self.doc[idx[0]].value
|
||||||
|
|
||||||
return [self.doc[v].value for v in idx]
|
return [self.doc[v].value for v in idx]
|
||||||
|
|
||||||
def token(self):
|
def token(self):
|
||||||
if self.doc:
|
if self.doc:
|
||||||
return self.doc[-1].token
|
return self.doc[-1].token
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def update(self, value):
|
def update(self, value):
|
||||||
|
@ -37,13 +42,16 @@ class Document:
|
||||||
self.pos = 0
|
self.pos = 0
|
||||||
for item in self.doc:
|
for item in self.doc:
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
self.pos += 1
|
self.pos += 1
|
||||||
|
|
||||||
def previous(self, match):
|
def previous(self, match):
|
||||||
if self.pos is 0:
|
if self.pos is 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if self.doc[self.pos - 1].token != match:
|
if self.doc[self.pos - 1].token != match:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self.doc[self.pos - 1]
|
return self.doc[self.pos - 1]
|
||||||
|
|
||||||
def filter(self, value):
|
def filter(self, value):
|
||||||
|
@ -56,12 +64,14 @@ class Document:
|
||||||
for item in self.doc:
|
for item in self.doc:
|
||||||
if item.token in METADATA:
|
if item.token in METADATA:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
def images(self):
|
def images(self):
|
||||||
for item in self.__iter__():
|
for item in self.__iter__():
|
||||||
if item.token == tokens.IMAGE:
|
if item.token == tokens.IMAGE:
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
if isinstance(item.value, list):
|
if isinstance(item.value, list):
|
||||||
for token in item.value:
|
for token in item.value:
|
||||||
if token.token == tokens.IMAGE:
|
if token.token == tokens.IMAGE:
|
||||||
|
@ -89,6 +99,7 @@ def parsebody(text, rx):
|
||||||
match = re.search(rx, text)
|
match = re.search(rx, text)
|
||||||
if match:
|
if match:
|
||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
return rx, text + "\n"
|
return rx, text + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
@ -98,23 +109,29 @@ def parseline(text):
|
||||||
match = re.search(rx, text)
|
match = re.search(rx, text)
|
||||||
if not match:
|
if not match:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
value = text[match.end():]
|
value = text[match.end():]
|
||||||
level = len(match.group(0))
|
level = len(match.group(0))
|
||||||
if count is True:
|
if count is True:
|
||||||
attrs = {"depth": level}
|
attrs = {"depth": level}
|
||||||
if key == tokens.META:
|
if key == tokens.META:
|
||||||
return (block, Token(token=match.group(0)[s:e], value=value))
|
return (block, Token(token=match.group(0)[s:e], value=value))
|
||||||
|
|
||||||
if key == tokens.SOURCE:
|
if key == tokens.SOURCE:
|
||||||
return block, Token(token=key, attrs=parse_attrs(value))
|
return block, Token(token=key, attrs=parse_attrs(value))
|
||||||
|
|
||||||
if key == tokens.TABLE:
|
if key == tokens.TABLE:
|
||||||
return block, Token(token=key, value=text + "\n")
|
return block, Token(token=key, value=text + "\n")
|
||||||
|
|
||||||
if key == tokens.BULLET:
|
if key == tokens.BULLET:
|
||||||
return block, Token(token=key, value=text + "\n")
|
return block, Token(token=key, value=text + "\n")
|
||||||
|
|
||||||
return block, Token(token=key, value=value, attrs=attrs)
|
return block, Token(token=key, value=value, attrs=attrs)
|
||||||
|
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
if text == "":
|
if text == "":
|
||||||
return False, Token(token=tokens.BLANK, value=text)
|
return False, Token(token=tokens.BLANK, value=text)
|
||||||
|
|
||||||
return False, Token(token=tokens.LIST, value=text + " ")
|
return False, Token(token=tokens.LIST, value=text + " ")
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,10 +142,12 @@ def parse_text(txt):
|
||||||
def img(char, step):
|
def img(char, step):
|
||||||
if char != "[":
|
if char != "[":
|
||||||
return char
|
return char
|
||||||
|
|
||||||
char = next(step, None)
|
char = next(step, None)
|
||||||
|
|
||||||
if char != "[":
|
if char != "[":
|
||||||
return char
|
return char
|
||||||
|
|
||||||
char = next(step, None)
|
char = next(step, None)
|
||||||
|
|
||||||
path = ""
|
path = ""
|
||||||
|
@ -155,6 +174,7 @@ def parse_text(txt):
|
||||||
def emphasis(char, step, end, tag):
|
def emphasis(char, step, end, tag):
|
||||||
if not char or char != end:
|
if not char or char != end:
|
||||||
return char
|
return char
|
||||||
|
|
||||||
char = next(step, None)
|
char = next(step, None)
|
||||||
r = ""
|
r = ""
|
||||||
while char and char not in [end] + ESCAPE:
|
while char and char not in [end] + ESCAPE:
|
||||||
|
@ -171,15 +191,21 @@ def parse_text(txt):
|
||||||
char = emphasis(char, step, "_", tokens.UNDERLINED)
|
char = emphasis(char, step, "_", tokens.UNDERLINED)
|
||||||
char = emphasis(char, step, "=", tokens.VERBATIM)
|
char = emphasis(char, step, "=", tokens.VERBATIM)
|
||||||
char = emphasis(char, step, "~", "PRE")
|
char = emphasis(char, step, "~", "PRE")
|
||||||
char = img(char, step)
|
#char = img(char, step)
|
||||||
|
char, token = parse_img_or_link(char, step)
|
||||||
|
if token:
|
||||||
|
tokenlist.append(token)
|
||||||
if not char:
|
if not char:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if len(tokenlist) == 0:
|
if len(tokenlist) == 0:
|
||||||
tokenlist.append(Token(tokens.TEXT, char))
|
tokenlist.append(Token(tokens.TEXT, char))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if tokenlist[-1].token != tokens.TEXT:
|
if tokenlist[-1].token != tokens.TEXT:
|
||||||
tokenlist.append(Token(tokens.TEXT, char))
|
tokenlist.append(Token(tokens.TEXT, char))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tokenlist[-1].value += char
|
tokenlist[-1].value += char
|
||||||
return tokenlist
|
return tokenlist
|
||||||
|
|
||||||
|
@ -194,11 +220,13 @@ def parse(stream):
|
||||||
if block:
|
if block:
|
||||||
doc.update(token)
|
doc.update(token)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
block, token = parseline(line)
|
block, token = parseline(line)
|
||||||
if token:
|
if token:
|
||||||
if doc.token() == tokens.LIST and token.token == tokens.LIST:
|
if doc.token() == tokens.LIST and token.token == tokens.LIST:
|
||||||
doc.update(token.value)
|
doc.update(token.value)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
doc.append(token)
|
doc.append(token)
|
||||||
|
|
||||||
for item in doc.filter(tokens.LIST):
|
for item in doc.filter(tokens.LIST):
|
||||||
|
|
|
@ -49,12 +49,30 @@ def test_image():
|
||||||
|
|
||||||
text = "[[../../../images/opengl/point-sprite-shader.png]]"
|
text = "[[../../../images/opengl/point-sprite-shader.png]]"
|
||||||
expected = [
|
expected = [
|
||||||
Token(tokens.IMAGE, ["../../../images/opengl/point-sprite-shader.png", ""]),
|
Token(
|
||||||
|
tokens.IMAGE,
|
||||||
|
["../../../images/opengl/point-sprite-shader.png", ""],
|
||||||
|
)
|
||||||
]
|
]
|
||||||
result = parse_text(text)
|
result = parse(text).doc
|
||||||
assert result[0].value == expected[0].value
|
assert result[0].value == expected[0].value
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_with_caption():
|
||||||
|
text = StringIO("""#+CAPTION: Test Image
|
||||||
|
text [[../../test.jpg][test]]""")
|
||||||
|
expected = [
|
||||||
|
Token(tokens.CAPTION, " Test Image"),
|
||||||
|
Token(tokens.LIST, [Token(tokens.IMAGE, ["../../test.jpg", "test"])]),
|
||||||
|
]
|
||||||
|
result = parse(text).doc
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0].token == expected[0].token
|
||||||
|
assert result[0].value == expected[0].value
|
||||||
|
assert result[1].token == expected[1].token
|
||||||
|
assert result[1].value == expected[1].value
|
||||||
|
|
||||||
|
|
||||||
def test_link():
|
def test_link():
|
||||||
text = "parse link [[../../test.html][test]] after link"
|
text = "parse link [[../../test.html][test]] after link"
|
||||||
expected = [
|
expected = [
|
||||||
|
@ -108,7 +126,7 @@ head -n 5 examples/html-plain/example.py
|
||||||
result = parse(text).doc
|
result = parse(text).doc
|
||||||
assert result[0].token == tokens.BLANK
|
assert result[0].token == tokens.BLANK
|
||||||
assert result[0].value == expected[0].value
|
assert result[0].value == expected[0].value
|
||||||
assert result[1].attrs.get('language') == 'sh'
|
assert result[1].attrs.get("language") == "sh"
|
||||||
assert result[1].value == expected[1].value
|
assert result[1].value == expected[1].value
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ def test_meta_headers():
|
||||||
match = re.search(rx, text)
|
match = re.search(rx, text)
|
||||||
assert match is not None
|
assert match is not None
|
||||||
|
|
||||||
|
|
||||||
def test_example():
|
def test_example():
|
||||||
text = "#+BEGIN_EXAMPLE"
|
text = "#+BEGIN_EXAMPLE"
|
||||||
rx = const.t_EXAMPLE_BEGIN
|
rx = const.t_EXAMPLE_BEGIN
|
||||||
|
@ -34,7 +35,6 @@ def test_example():
|
||||||
assert match is not None
|
assert match is not None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_source():
|
def test_source():
|
||||||
# invalid if no language specified
|
# invalid if no language specified
|
||||||
text = "#+BEGIN_SRC"
|
text = "#+BEGIN_SRC"
|
||||||
|
@ -64,3 +64,15 @@ def test_bullets():
|
||||||
rx = const.t_BULLET_START
|
rx = const.t_BULLET_START
|
||||||
match = re.search(rx, text)
|
match = re.search(rx, text)
|
||||||
assert match is not None
|
assert match is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_captions_regex():
|
||||||
|
text = "#+CAPTION: Test"
|
||||||
|
rx = const.t_CAPTIONS
|
||||||
|
match = re.search(rx, text)
|
||||||
|
assert match is not None
|
||||||
|
|
||||||
|
text = "#+CAPTION:Test"
|
||||||
|
rx = const.t_CAPTIONS
|
||||||
|
match = re.search(rx, text)
|
||||||
|
assert match is not None
|
||||||
|
|
Loading…
Reference in New Issue