PageRenderTime 25ms CodeModel.GetById 14ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/validation/fix_errors.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 60 lines | 38 code | 9 blank | 13 comment | 16 complexity | b88052cdbb70e297a368eeaa2b812dbc MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Fix errors in a dataset.
 5For now, only removing erroneous lines is supported.
 6
 7usage: %prog input errorsfile output
 8    -x, --ext: dataset extension (type)
 9    -m, --methods=N: comma separated list of repair methods
10"""
11
12import pkg_resources; pkg_resources.require( "bx-python" )
13from bx.cookbook import doc_optparse
14
15from galaxy import util
16
17def main():
18    options, args = doc_optparse.parse( __doc__ )
19    methods = []
20    try:
21        if options.methods: methods = options.methods.split(",")
22    except:
23        pass
24    
25    ext = options.ext
26
27    in_file = open(args[0], "r")
28    error_file = open(args[1], "r")
29    out_file = open(args[2], "w")
30
31    # string_to_object errors
32    error_list = util.string_to_object(error_file.read())
33    # index by error type and then by line number
34    error_lines = {}
35    error_types = {}
36    for error in error_list:
37        if error.linenum:
38            if error.linenum in error_lines:
39                error_lines[error.linenum].append(error)
40            else:
41                error_lines[error.linenum] = [error]
42        error_type = error.__class__.__name__
43        if error_type in error_types:
44            error_types[error_type].append(error)
45        else:
46            error_types[error_type] = [error]
47
48    linenum = 0
49    for line in in_file:
50        linenum += 1
51        # write unless
52        if "lines" in methods:
53            if linenum in error_lines:
54                line = None
55            # other processing here?
56        if line:
57            out_file.write(line)
58    
59if __name__ == "__main__":
60    main()