diff --git a/mhackspace/contact/templatetags/recapture.py b/mhackspace/contact/templatetags/recapture.py
index d097d22..c3ce887 100644
--- a/mhackspace/contact/templatetags/recapture.py
+++ b/mhackspace/contact/templatetags/recapture.py
@@ -1,11 +1,10 @@
# -*- coding: utf-8 -*-
from django import template
-from mhackspace.feeds.models import Feed
-from scaffold.readers.rss_reader import feed_reader
from django.conf import settings
register = template.Library()
-@register.inclusion_tag('partials/recapture.html')
+
+@register.inclusion_tag("partials/recapture.html")
def google_capture():
- return settings.CAPTCHA
+ return settings.CAPTCHA
diff --git a/mhackspace/feeds/helper.py b/mhackspace/feeds/helper.py
index baf12b4..b226931 100644
--- a/mhackspace/feeds/helper.py
+++ b/mhackspace/feeds/helper.py
@@ -1,53 +1,34 @@
# -*- coding: utf-8 -*-
import os
import logging
-import feedparser
from time import mktime
from datetime import datetime
from urllib.request import urlretrieve
from django.core.files import File
-from django.utils.timezone import make_aware
-from django.utils import timezone
from stdimage.utils import render_variations
from mhackspace.feeds.reader import fetch_feeds
-# from scaffold.readers.rss_reader import feed_reader
-
from mhackspace.feeds.models import Feed, Article, image_variations
logger = logging.getLogger(__name__)
-def feed_reader(feeds):
- for feed in feeds:
- print(feed)
- yield feedparser.parse(feed["url"])
-
-
def import_feeds(feed=False):
remove_old_articles()
-
- print([f.get("url") for f in get_active_feeds(feed)])
- rss_articles = fetch_feeds(get_active_feeds(feed))
-
articles = []
- for article in rss_articles:
+ for article in fetch_feeds(get_active_feeds(feed)):
date = datetime.fromtimestamp(mktime(article["date"]))
- print(article["title"])
- print(article["image"])
- print('#############')
articles.append(
Article(
url=article["url"],
feed=Feed.objects.get(pk=article["feed"]),
- title=article["title"][0:100],
- original_image=article["image"][0:100],
+ title=article["title"],
+ original_image=article["image"],
description=article["description"],
date=date,
)
)
-
articles = Article.objects.bulk_create(articles)
download_remote_images()
return articles
diff --git a/mhackspace/feeds/reader.py b/mhackspace/feeds/reader.py
index c0df1b4..3802f8c 100644
--- a/mhackspace/feeds/reader.py
+++ b/mhackspace/feeds/reader.py
@@ -5,6 +5,9 @@ from lxml import etree
from lxml.html.clean import Cleaner
from io import StringIO, BytesIO
+from django.utils.html import escape
+
+
namespaces = {}
urls = [
"https://feeds.feedburner.com/projects-jl",
@@ -20,18 +23,6 @@ def parse_content(content):
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0"
}
- html_cleaner = Cleaner()
- html_cleaner.javascript = True
- html_cleaner.style = True
- html_cleaner.remove_tags = [
- "script",
- "iframe",
- "link",
- "style",
- "img",
- "div",
- ]
- # ~ html_cleaner.allow_tags = ['a', 'p', 'strong']
html_img_cleaner = Cleaner(allow_tags=["img"], remove_unknown_tags=False)
html_img_cleaner.allow_tags = ["img"]
@@ -40,17 +31,13 @@ def parse_content(content):
remove_blank_text=True, ns_clean=True, encoding="utf-8"
)
- print("------------------")
- print(content)
-
- dom = lxml.etree.XML("
" + content + "
", xml_parser)
- return dom
+ return lxml.etree.XML("" + escape(content) + "
", xml_parser)
def fetch_image_from_node_text(text):
description = lxml.etree.parse(text, html_parser)
for image in description.xpath(".//img"):
- print('fetch image from node text')
+ print("fetch image from node text")
return image.get("src")
return None
@@ -71,16 +58,14 @@ def fetch_node_text(node, name, default=u""):
def fetch_image(post, node, namespaces):
"""Try and get an image from an item in the feed, use various fall back methods"""
if hasattr(post, "media_thumbnail"):
- print('media')
-
+ print("media")
image = post.media_thumbnail
print(image)
-
if image:
return image[0].get("url")
if hasattr(post, "content"):
- print('content')
+ print("content")
content = " ".join(c.value for c in post.content)
image = fetch_image_from_node_text(content)
if image:
@@ -89,7 +74,7 @@ def fetch_image(post, node, namespaces):
# final attempt at getting an image from the item using description
result = fetch_node_text(node, "description")
if result:
- print('description')
+ print("description")
image = fetch_image_from_node_text(result)
if image:
return image
@@ -99,33 +84,27 @@ def fetch_image(post, node, namespaces):
def fetch_feeds(feeds):
+ articles = []
+
for feed in feeds:
- url = feed.get('url')
- print(url)
+ url = feed.get("url")
parsed = feedparser.parse(url)
namespaces = {}
if hasattr(parsed, "namespaces"):
namespaces = parsed.namespaces
feed_image = ""
if hasattr(parsed.feed, "image"):
- feed_image = parsed.feed.image.get('href')
- articles = []
+ feed_image = parsed.feed.image.get("href")
for post in parsed.entries:
- print(post.published)
- print(feed_image)
root_node = parse_content(post.description)
- image = fetch_image(post, root_node, namespaces) #or feed_image
-
- articles.append(
- {
- "url": post.link,
- "feed": feed.get('id'),
- "title": post.title,
- "original_image": image,
- "description": post.description,
- "date": post.published_parsed,
- "image": feed_image,
- }
- )
- print(articles[-1])
+ image = fetch_image(post, root_node, namespaces) or feed_image
+ yield {
+ "url": post.link,
+ "feed": feed.get("id"),
+ "title": post.title,
+ "original_image": image,
+ "description": post.description,
+ "date": post.published_parsed,
+ "image": image,
+ }
return articles
diff --git a/requirements/base.txt b/requirements/base.txt
index b2bbb7d..c90294d 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -8,10 +8,10 @@ django==2.1.1
django-dynamic-filenames==1.1.3
# Configuration
django-environ==0.4.5
-whitenoise==4.0
+whitenoise==4.1
# Static and Media Storage
# ------------------------------------------------
-boto3==1.9.1
+boto3==1.9.5
django-storages==1.7.1
# django-storages-redux==1.3.2
@@ -55,7 +55,7 @@ django-compressor==2.2
#fix for use with s3 buckets merged in master, so next release we can remove this
#django-sass-processor==0.5.7
git+https://github.com/jrief/django-sass-processor.git
-libsass==0.14.5
+libsass==0.15.0
lxml==4.2.5
# WSGI Handler
@@ -72,9 +72,6 @@ gocardless_pro==1.8.0
braintree==3.48.0
django-autofixture==0.12.1
-
-git+https://github.com/olymk2/scaffold.git
-#git+git://github.com/olymk2/django-wiki.git
git+git://github.com/django-wiki/django-wiki.git
djangorestframework==3.8.2
@@ -83,8 +80,7 @@ django-filter==2.0.0
coreapi==2.3.3
# api libraries end
-#martor==1.3.2
-git+git://github.com/olymk2/django-markdown-editor.git
+martor==1.3.3
django-spirit==0.6.1
django-djconfig==0.8.0
@@ -103,4 +99,4 @@ python-magic==0.4.15
ldap3==2.5.1
bcrypt==3.1.4
python-twitter==3.4.2
-feedparser
+feedparser==5.2.1