gsr_processor.py | searchcode

/embersUtils/gsr_processor.py

https://bitbucket.org/sathappanspm/embers · Python · 96 lines · 76 code · 14 blank · 6 comment · 15 complexity · 8a292653b4b62635c9e60a6257348c4d MD5 · raw file


#!/usr/bin/env python
#-*- coding:utf-8 -*-
# vim: ts=4 sts=4 sw=4 tw=79 sta et
"""
    *.py: Description of what * does.
"""

__author__ = "Sathappan Muthiah"
__email__ = "sathap1@vt.edu"
__version__ = "0.0.1"

from etool import args, queue
from collections import namedtuple
import xlrd
from datetime import datetime
import re

GSR_TITLES = namedtuple('GSR_Warning', 'eventId, eventSubId, EntryRevisionDate, recordStatus, country, state, city, eventCode, population, date, earliestReportedDate, source, headline, eventDescription, firstRepLink, otherLinks_gss, otherLinks1, otherLinks2, encodingComment')


def format_loc(loc_item):
    if len(loc_item) == 0 or loc_item.lower() == 'na' or loc_item.lower() == 'n/a':
        return '-'
    else:
        return loc_item.strip()


def format_str(s):
    if isinstance(s, str):
        return s.strip().decode('utf-8')
    if isinstance(s, unicode):
        return s.strip()
    return unicode(s).strip()


def format_date(xlDate, datemode):
    if isinstance(xlDate, unicode):
        return datetime.strptime(xlDate, '%m/%d/%Y').isoformat('T')
    year, month, date, hour, minute, second = xlrd.xldate_as_tuple(xlDate, datemode)
    date_str = datetime(year, month, date, hour, minute, second).isoformat('T')
    return date_str


def create_named_tuple(titles):
    title_deDup = []
    for k in titles:
        if k in title_deDup:
            title_deDup.append(k + '2')
        else:
            title_deDup.append(k)
    title_str = ','.join([re.sub('[^a-z0-9]', '', k.lower()) for k in title_deDup])
    return namedtuple('GSR_Warning', title_str)


def main(args):
    wb = xlrd.open_workbook(args.gsr)
    sh = wb.sheet_by_name('V1')
    GSR_TITLES = create_named_tuple(sh.row_values(0))
    publisher = queue.open(args.pub, 'w', capture=args.noCapture)
    for rownum in range(1, sh.nrows):
        warning = {}
        rowValue = GSR_TITLES._make(sh.row_values(rownum))
        warning['embersId'] = str(int(rowValue.eventid))
        warning['eventDate'] = format_date(rowValue.date, wb.datemode)
        warning['location'] = [format_loc(rowValue.country),
                               format_loc(rowValue.state), format_loc(rowValue.city)]
        warning['model'] = 'GSR'
        warning['confidence'] = 1.00
        warning['confidenceIsProbability'] = False
        warning['eventType'] = format_str(rowValue.eventcode)
        if args.type:
            if not re.match('%s.*' % args.type, warning['eventType']):
                continue
        warning['population'] = format_str(rowValue.population)
        warning['date'] = format_date(rowValue.earliestreporteddate, wb.datemode)
        warning['derivedFrom'] = {
            'derivedIds': [],
            'embersSubId': rowValue.eventsubid,
            'status': rowValue.newssource,
            'headline': rowValue.headline,
            'description': rowValue.eventdescription,
            'firstReportedLink': rowValue.firstreportedlink,
            'gssLink': rowValue.otherlinksgsslink,
            'otherLinks1': rowValue.otherlinks,
            'otherLinks2': rowValue.otherlinks2,
            'geoCorrected': False
        }
        publisher.write(warning)

if __name__ == "__main__":
    ap = args.get_parser()
    ap.add_argument('-g', '--gsr', type=str, help='Location of gsr excel sheet')
    ap.add_argument('-n', '--noCapture', action='store_false', default=True, help='Location of gsr excel sheet')
    ap.add_argument('-t', '--type', help="filter by type, enter the first two digits of the code")
    args = ap.parse_args()
    main(args)

Tech Fingerprint

Alerts (9)

'def' Ensure functions have docstrings for documentation
21 28 36 44 55
Complexity hotspot; line 22 (total complexity: 3)
22
'isinstance(' Overuse may indicate design issues; consider polymorphism
29 31 37