/tools/fastq/tabular_to_fastq.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 29 lines · 24 code · 4 blank · 1 comment · 6 complexity · 430e3005b333784fa0df5345883bac75 MD5 · raw file

  1. #Dan Blankenberg
  2. import sys
  3. def main():
  4. input_filename = sys.argv[1]
  5. output_filename = sys.argv[2]
  6. identifier_col = int( sys.argv[3] ) - 1
  7. sequence_col = int( sys.argv[4] ) - 1
  8. quality_col = int( sys.argv[5] ) - 1
  9. max_col = max( identifier_col, sequence_col, quality_col )
  10. num_reads = None
  11. fastq_read = None
  12. skipped_lines = 0
  13. out = open( output_filename, 'wb' )
  14. for num_reads, line in enumerate( open( input_filename ) ):
  15. fields = line.rstrip( '\n\r' ).split( '\t' )
  16. if len( fields ) > max_col:
  17. out.write( "@%s\n%s\n+\n%s\n" % ( fields[identifier_col], fields[sequence_col], fields[quality_col] ) )
  18. else:
  19. skipped_lines += 1
  20. out.close()
  21. if num_reads is None:
  22. print "Input was empty."
  23. else:
  24. print "%i tabular lines were written as FASTQ reads. Be sure to use the FASTQ Groomer tool on this output before further analysis." % ( num_reads + 1 - skipped_lines )
  25. if __name__ == "__main__": main()