PageRenderTime 16ms CodeModel.GetById 7ms app.highlight 6ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/filters/randomlines.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 33 lines | 23 code | 7 blank | 3 comment | 6 complexity | 8bba61336b3ad50093b6b2717ca87c07 MD5 | raw file
 1#!/usr/bin/env python
 2# Kanwei Li, 2010
 3# Selects N random lines from a file and outputs to another file
 4
 5import random, sys
 6
 7def main():
 8    infile = open(sys.argv[1], 'r')
 9    total_lines = int(sys.argv[2])
10    
11    if total_lines < 1:
12        sys.stderr.write( "Must select at least one line." )
13        sys.exit()
14    
15    kept = []
16    n = 0
17    for line in infile:
18        line = line.rstrip("\n")
19        n += 1
20        if (n <= total_lines):
21            kept.append(line)
22        elif random.randint(1, n) <= total_lines:
23            kept.pop(random.randint(0, total_lines-1))
24            kept.append(line)
25    
26    if n < total_lines:
27        sys.stderr.write( "Error: asked to select more lines than there were in the file." )
28        sys.exit()
29        
30    open(sys.argv[3], 'w').write( "\n".join(kept) )
31    
32if __name__ == "__main__":
33    main()