/tools/validation/fix_errors.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 60 lines · 38 code · 9 blank · 13 comment · 13 complexity · b88052cdbb70e297a368eeaa2b812dbc MD5 · raw file
- #!/usr/bin/env python
- """
- Fix errors in a dataset.
- For now, only removing erroneous lines is supported.
- usage: %prog input errorsfile output
- -x, --ext: dataset extension (type)
- -m, --methods=N: comma separated list of repair methods
- """
- import pkg_resources; pkg_resources.require( "bx-python" )
- from bx.cookbook import doc_optparse
- from galaxy import util
- def main():
- options, args = doc_optparse.parse( __doc__ )
- methods = []
- try:
- if options.methods: methods = options.methods.split(",")
- except:
- pass
-
- ext = options.ext
- in_file = open(args[0], "r")
- error_file = open(args[1], "r")
- out_file = open(args[2], "w")
- # string_to_object errors
- error_list = util.string_to_object(error_file.read())
- # index by error type and then by line number
- error_lines = {}
- error_types = {}
- for error in error_list:
- if error.linenum:
- if error.linenum in error_lines:
- error_lines[error.linenum].append(error)
- else:
- error_lines[error.linenum] = [error]
- error_type = error.__class__.__name__
- if error_type in error_types:
- error_types[error_type].append(error)
- else:
- error_types[error_type] = [error]
- linenum = 0
- for line in in_file:
- linenum += 1
- # write unless
- if "lines" in methods:
- if linenum in error_lines:
- line = None
- # other processing here?
- if line:
- out_file.write(line)
-
- if __name__ == "__main__":
- main()