pelican_bibtex.py | searchcode

/pelican_bibtex.py

https://bitbucket.org/joshua.adelman/pelican_bibtex
Python | 166 lines | 116 code | 5 blank | 45 comment | 3 complexity | bedb8ee58b2f38357b5983683670eba9 MD5 | raw file

# -*- coding: utf-8 -*-

'''
Pelican BibTeX plugin
=====================

A Pelican plugin that makes available the contents of a BibTeX file to the template 
of a Page context.


Author: Joshua L. Adelman 
'''

from pelican import signals

from collections import namedtuple
import itertools

try: 
    from pybtex.database.input import bibtex
    from pybtex.database import PybtexError
    from pybtex.backends import html
    from pybtex.style.formatting import plain
except ImportError:
    raise Exception("Unable to load pybtex module")

class BibtexParser():
    '''Parses a BibTeX file and populates a list of namedtuples with fields and data.
    Each namedtuple corresponds to a publication in BibTeX file. The fields of the
    namedtuple can be set in the pelican config file, but by default all available tags
    in the BibTeX file are made available. Additionally the namedtuple will contain a
    formatted entry `pub.html` with the complete citation.
    
    Paramaters set in the Pelican config file:

    Parameters
    ----------
    BIBTEX_SOURCE : str, Required 
        Path to .bib file to be parsed
    BIBTEX_TAGS : list
        A list of the tags that will be used to create the namedtuple. If not specified
        all tags present in the BIBTEX_SOURCE are used.
    BIBTEX_FOOTNOTES : dict of tuples
        A dict containing (symbol, footnote) tuples. If the symbol is found in list of 
        authors, the key field of the publication object is populated with the tuple.
        This is convinient for specifying equal authorship, corresponding authorship, etc.
    BIBTEX_STYLE : module
        A module containing a class called `Style` that contains a pybtex template. This is
        used to generate the formatted html entry. By default the plugin uses the `plain` style
        (see pybtex/style/formatting/plain.py). 

    Examples
    --------
    In pelicanconf.py:

    ...
    import pybtex_custom_style

    BIBTEX_SOURCE = 'publications.bib'
    BIBTEX_TAGS = ['Author', 'DOI', 'Abstract']
    BIBTEX_FOOTNOTES = {'shared': (u'‡', 'These authors contributed equally to this work.')}
    BIBTEX_STYLE = pybtex_custom_style
    ...

    Would return a list of namedtuples, with each entry `pub` containing the populated fields,
    ['author', 'doi', 'abstract', html', 'authors'], where `author` is the raw data from the .bib
    file and `authors` is a single formatted string generated from `author`.
    '''

    def __init__(self, generator):
        print('Initializing BibtexParser')

        self.parser = bibtex.Parser()
        self.bibtex_file = generator.settings['BIBTEX_SOURCE']
        self.retrieve_tags = generator.settings.get('BIBTEX_TAGS')
        self.footnote_map = generator.settings.get('BIBTEX_FOOTNOTES', {})
        self.style_format = generator.settings.get('BIBTEX_STYLE', plain)


    def get_all_tags(self, data):
        tags = []
        for key in data.entries:
            tags.extend(list(data.entries[key].fields))

        tags = set(tag.replace('-', '_') for tag in tags)

        return list(tags)

    def fetch(self):
        """
            returns a list of namedtuples containing publication data
        """
        try:
            data = self.parser.parse_file(self.bibtex_file)
        except PybtexError:
            raise('`pelican_bibtex` failed to open file {}'.format(self.bibtex_file))

        
        bibdata = []

        # Coerce data into a more reasonable data structure
        # Build named tuple from requested `retrieve_tags`, or by default
        # get all tags in bibtext file
        fields = [t.lower() for t in self.retrieve_tags or self.get_all_tags(data)]
        generated_fields = ['authors', 'html'] + list(self.footnote_map)
        pub = namedtuple('Publications',fields + generated_fields)

        style = self.style_format.Style()
        html_backend = html.Backend()
        formatted_entries = style.format_entries(data.entries.values())

        for formatted_entry in formatted_entries:
            pkey = formatted_entry.key
            p = data.entries[pkey]

            pfields = {f:p.fields.get(f) for f in fields if f not in generated_fields} 

            # Reformat author list
            authors = p.persons['author']

            auth_list = []
            for author in authors:
                if len(author.middle()) > 0:
                    middle = [author.middle()[0] + '.']
                else:
                    middle = ''

                pieces = [author.first(), middle, author.prelast(), author.last(), author.lineage()] 
                name = ' '.join(list(itertools.chain.from_iterable(pieces)))

                auth_list.append(name)

            pfields['authors'] = auth_list

            # Parse footnotes
            for foot_id, (symbol, note) in self.footnote_map.items():
                if any(symbol in name for name in auth_list):
                    pfields[foot_id] = (symbol, note)
                else:
                    pfields[foot_id] = None

            # Render the html
            text = formatted_entry.text.render(html_backend)

            pfields['html'] = text
            bibdata.append(pub(**pfields))

        return bibdata


def bibtex_parser_initialization(generator):
    """
        Initialization of parser
    """
    generator.plugin_instance = BibtexParser(generator)

def bibtex_fetch_publications(generator, metadata):
    if 'BIBTEX_SOURCE' in generator.settings.keys():
        generator.context['bibtex_entries'] = generator.plugin_instance.fetch()

def register():
    """
        Plugin registration
    """
    signals.pages_generator_init.connect(bibtex_parser_initialization)
    signals.pages_generate_context.connect(bibtex_fetch_publications)