/talerfrontends/blog/content.py
https://gitlab.com/taler/merchant-frontends · Python · 131 lines · 91 code · 16 blank · 24 comment · 8 complexity · 5b3ad1dec6a787ea22232f2976e1c9ff MD5 · raw file
- # This file is part of GNU TALER.
- # Copyright (C) 2014-2016 INRIA
- #
- # TALER is free software; you can redistribute it and/or modify it under the
- # terms of the GNU Lesser General Public License as published by the Free Software
- # Foundation; either version 2.1, or (at your option) any later version.
- #
- # TALER is distributed in the hope that it will be useful, but WITHOUT ANY
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public License along with
- # GNU TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/>
- #
- # @author Florian Dold
- """
- Define content and associated metadata that is served on the blog.
- """
- from collections import OrderedDict
- from bs4 import BeautifulSoup
- from pkg_resources import resource_stream, resource_filename
- from collections import namedtuple
- import logging
- import os
- import re
- logger = logging.getLogger(__name__)
- Article = namedtuple("Article", "slug title teaser main_file extra_files")
- articles = OrderedDict()
- def add_article(slug, title, teaser, main_file, extra_files=[]):
- articles[slug] = Article(slug, title, teaser, main_file, extra_files)
- def get_image_file(image):
- f = resource_filename("talerfrontends", os.path.join("blog/data/", image))
- return os.path.abspath(f)
- def get_article_file(article):
- f = resource_filename("talerfrontends", article.main_file)
- return os.path.basename(f)
- def add_from_html(resource_name, teaser_paragraph=0, title=None):
- """
- Extract information from article html.
- """
- res = resource_stream("talerfrontends", resource_name)
- soup = BeautifulSoup(res, 'html.parser')
- if title is None:
- title_el = soup.find("h1", attrs={"class":["chapter", "unnumbered"]})
- if title_el is None:
- logger.warn("Can't extract title from '%s'", resource_name)
- title = resource_name
- else:
- title = title_el.get_text().strip()
- slug = title.replace(" ", "_")
- paragraphs = soup.find_all("p")
-
- teaser = soup.find("p", attrs={"id":["teaser"]})
- if teaser is None:
- teaser = str(paragraphs[teaser_paragraph])
- p = re.compile("^/essay/[^/]+/data/[^/]+$")
- imgs = soup.find_all("img")
- extra_files = []
- for img in imgs:
- # We require that any image whose access is regulated is src'd
- # as "<slug>/data/img.png". We also need to check if the <slug>
- # component actually matches the article's slug
- if p.match(img['src']):
- if img['src'].split(os.sep)[2] == slug:
- logger.info("extra file for %s is %s" % (slug, os.path.basename(img['src'])))
- extra_files.append(os.path.basename(img['src']))
- else:
- logger.warning("Image src and slug don't match: '%s' != '%s'" % (img['src'].split(os.sep)[2], slug))
- add_article(slug, title, teaser, resource_name, extra_files)
- add_from_html("blog/articles/scrap1_U.0.html", 0)
- add_from_html("blog/articles/scrap1_U.1.html", 0)
- add_from_html("blog/articles/scrap1_1.html", 1)
- add_from_html("blog/articles/scrap1_2.html")
- add_from_html("blog/articles/scrap1_3.html")
- add_from_html("blog/articles/scrap1_4.html")
- add_from_html("blog/articles/scrap1_5.html")
- add_from_html("blog/articles/scrap1_6.html")
- add_from_html("blog/articles/scrap1_7.html")
- add_from_html("blog/articles/scrap1_8.html")
- add_from_html("blog/articles/scrap1_9.html")
- add_from_html("blog/articles/scrap1_10.html")
- add_from_html("blog/articles/scrap1_11.html")
- add_from_html("blog/articles/scrap1_12.html")
- add_from_html("blog/articles/scrap1_13.html", 1)
- add_from_html("blog/articles/scrap1_14.html")
- add_from_html("blog/articles/scrap1_15.html")
- add_from_html("blog/articles/scrap1_16.html")
- add_from_html("blog/articles/scrap1_17.html")
- add_from_html("blog/articles/scrap1_18.html")
- add_from_html("blog/articles/scrap1_19.html")
- add_from_html("blog/articles/scrap1_20.html", 1)
- add_from_html("blog/articles/scrap1_21.html")
- add_from_html("blog/articles/scrap1_22.html")
- add_from_html("blog/articles/scrap1_23.html")
- add_from_html("blog/articles/scrap1_24.html")
- add_from_html("blog/articles/scrap1_25.html", 1)
- add_from_html("blog/articles/scrap1_26.html", 1)
- add_from_html("blog/articles/scrap1_27.html")
- add_from_html("blog/articles/scrap1_28.html", 1)
- add_from_html("blog/articles/scrap1_29.html")
- add_from_html("blog/articles/scrap1_30.html", 1)
- add_from_html("blog/articles/scrap1_31.html", 1)
- add_from_html("blog/articles/scrap1_32.html")
- add_from_html("blog/articles/scrap1_33.html")
- add_from_html("blog/articles/scrap1_34.html")
- add_from_html("blog/articles/scrap1_35.html")
- add_from_html("blog/articles/scrap1_36.html")
- add_from_html("blog/articles/scrap1_37.html")
- add_from_html("blog/articles/scrap1_38.html")
- add_from_html("blog/articles/scrap1_39.html")
- add_from_html("blog/articles/scrap1_40.html")
- add_from_html("blog/articles/scrap1_41.html")
- add_from_html("blog/articles/scrap1_42.html")
- add_from_html("blog/articles/scrap1_43.html", 2)
- add_from_html("blog/articles/scrap1_46.html", 1)
- add_from_html("blog/articles/scrap1_47.html")