/lib/galaxy/datatypes/indexers/interval.awk

https://bitbucket.org/cistrome/cistrome-harvard/ · AWK · 43 lines · 36 code · 0 blank · 7 comment · 5 complexity · be5638dba123982e04f254e6116d5481 MD5 · raw file

  1. BEGIN {
  2. # from galaxy.utils
  3. mapped_chars[">"] = "__gt__"
  4. mapped_chars["<"] = "__lt__"
  5. mapped_chars["'"] = "__sq__"
  6. mapped_chars["\""] = "__dq__"
  7. mapped_chars["\\["] = "__ob__"
  8. mapped_chars["\\]"] = "__cb__"
  9. mapped_chars["\\{"] = "__oc__"
  10. mapped_chars["\\}"] = "__cc__"
  11. mapped_chars["@"] = "__at__"
  12. # additional, not in galaxy.utils
  13. mapped_chars["/"] = "__fs__"
  14. mapped_chars["^manifest\.tab$"] = "__manifest.tab__"
  15. }
  16. function escape_filename( name )
  17. {
  18. for( char in mapped_chars ) {
  19. gsub( char, mapped_chars[char], name )
  20. }
  21. return name
  22. }
  23. !_[$chrom]++ {
  24. # close files only when we switch to a new one.
  25. fn && close(fn)
  26. fn = storepath "/" escape_filename($1) }
  27. {
  28. print $0 >> fn;
  29. # the || part is needed to catch 0 length chromosomes, which
  30. # should never happen but...
  31. if ($end > chroms[$chrom] || !chroms[$chrom])
  32. chroms[$chrom] = $end }
  33. END {
  34. fn = storepath "/manifest.tab"
  35. for( x in chroms ) {
  36. # add line to manifest
  37. print x "\t" chroms[x] >> fn
  38. chromfile = storepath "/" escape_filename(x)
  39. # sort in-place
  40. system( "sort -f -n -k " chrom " -k " start " -k " end " -o " chromfile " " chromfile )
  41. close(chromfile)
  42. }
  43. }