/tools/validation/fix_errors.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 60 lines · 38 code · 9 blank · 13 comment · 13 complexity · b88052cdbb70e297a368eeaa2b812dbc MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Fix errors in a dataset.
  4. For now, only removing erroneous lines is supported.
  5. usage: %prog input errorsfile output
  6. -x, --ext: dataset extension (type)
  7. -m, --methods=N: comma separated list of repair methods
  8. """
  9. import pkg_resources; pkg_resources.require( "bx-python" )
  10. from bx.cookbook import doc_optparse
  11. from galaxy import util
  12. def main():
  13. options, args = doc_optparse.parse( __doc__ )
  14. methods = []
  15. try:
  16. if options.methods: methods = options.methods.split(",")
  17. except:
  18. pass
  19. ext = options.ext
  20. in_file = open(args[0], "r")
  21. error_file = open(args[1], "r")
  22. out_file = open(args[2], "w")
  23. # string_to_object errors
  24. error_list = util.string_to_object(error_file.read())
  25. # index by error type and then by line number
  26. error_lines = {}
  27. error_types = {}
  28. for error in error_list:
  29. if error.linenum:
  30. if error.linenum in error_lines:
  31. error_lines[error.linenum].append(error)
  32. else:
  33. error_lines[error.linenum] = [error]
  34. error_type = error.__class__.__name__
  35. if error_type in error_types:
  36. error_types[error_type].append(error)
  37. else:
  38. error_types[error_type] = [error]
  39. linenum = 0
  40. for line in in_file:
  41. linenum += 1
  42. # write unless
  43. if "lines" in methods:
  44. if linenum in error_lines:
  45. line = None
  46. # other processing here?
  47. if line:
  48. out_file.write(line)
  49. if __name__ == "__main__":
  50. main()