extract.py | searchcode

/helpers/graphs/hashtag-entity/extract.py

https://github.com/champ1/twittomatic · Python · 36 lines · 23 code · 8 blank · 5 comment · 7 complexity · 1a197745dedbdb21c8151271b0d6956f MD5 · raw file


"""
Scripts that read annotation file and outputs a tab-separated stream

Use it in conjunction of sort -k1 -u
"""

import sys
import json
import gzip

def read(inputfile):
    with gzip.open(inputfile, 'r') as input:
        for line in input:
            obj = json.loads(line.strip())

            hts = obj['hts']
            annotations = obj['annotations']

            for ht in sorted(hts):
                for annotation in sorted(annotations):
                    line = "%s\t%s\t%s\t%s" % (ht, annotation[0], annotation[1], annotation[2])
                    print line.encode('utf8')

if __name__ == "__main__":
    from optparse import OptionParser

    parser = OptionParser(description="Read the annotation file and outputs TSV")
    parser.add_option("-i", "--input", dest="inputfile",
                      help="Annotation file in json.gz format")

    (options, args) = parser.parse_args()

    if options.inputfile:
        read(options.inputfile)
    else:
        parser.print_help()

Tech Fingerprint

Alerts (2)

'def' Ensure functions have docstrings for documentation
11
'gzip.open(' Potential decompression bomb vulnerability in Python code if input is untrusted; ensure to limit the number of bytes read.
12