PageRenderTime 32ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/filters/randomlines.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 33 lines | 23 code | 7 blank | 3 comment | 6 complexity | 8bba61336b3ad50093b6b2717ca87c07 MD5 | raw file
  1. #!/usr/bin/env python
  2. # Kanwei Li, 2010
  3. # Selects N random lines from a file and outputs to another file
  4. import random, sys
  5. def main():
  6. infile = open(sys.argv[1], 'r')
  7. total_lines = int(sys.argv[2])
  8. if total_lines < 1:
  9. sys.stderr.write( "Must select at least one line." )
  10. sys.exit()
  11. kept = []
  12. n = 0
  13. for line in infile:
  14. line = line.rstrip("\n")
  15. n += 1
  16. if (n <= total_lines):
  17. kept.append(line)
  18. elif random.randint(1, n) <= total_lines:
  19. kept.pop(random.randint(0, total_lines-1))
  20. kept.append(line)
  21. if n < total_lines:
  22. sys.stderr.write( "Error: asked to select more lines than there were in the file." )
  23. sys.exit()
  24. open(sys.argv[3], 'w').write( "\n".join(kept) )
  25. if __name__ == "__main__":
  26. main()