/tools/filters/randomlines.py
Python | 33 lines | 23 code | 7 blank | 3 comment | 6 complexity | 8bba61336b3ad50093b6b2717ca87c07 MD5 | raw file
1#!/usr/bin/env python 2# Kanwei Li, 2010 3# Selects N random lines from a file and outputs to another file 4 5import random, sys 6 7def main(): 8 infile = open(sys.argv[1], 'r') 9 total_lines = int(sys.argv[2]) 10 11 if total_lines < 1: 12 sys.stderr.write( "Must select at least one line." ) 13 sys.exit() 14 15 kept = [] 16 n = 0 17 for line in infile: 18 line = line.rstrip("\n") 19 n += 1 20 if (n <= total_lines): 21 kept.append(line) 22 elif random.randint(1, n) <= total_lines: 23 kept.pop(random.randint(0, total_lines-1)) 24 kept.append(line) 25 26 if n < total_lines: 27 sys.stderr.write( "Error: asked to select more lines than there were in the file." ) 28 sys.exit() 29 30 open(sys.argv[3], 'w').write( "\n".join(kept) ) 31 32if __name__ == "__main__": 33 main()