/tools/filters/condense_characters.pl

https://bitbucket.org/cistrome/cistrome-harvard/ · Perl · 105 lines · 89 code · 13 blank · 3 comment · 4 complexity · 83836a44fb54297b6fea423578ad74f9 MD5 · raw file

  1. #! /usr/bin/perl -w
  2. use strict;
  3. use warnings;
  4. # condenses all consecutive characters of one type
  5. # convert_characters.pl [input] [character] [output]
  6. die "Check arguments" unless @ARGV == 3;
  7. my $inputfile = $ARGV[0];
  8. my $character = $ARGV[1];
  9. my $outputfile = $ARGV[2];
  10. my $convert_from;
  11. my $convert_to;
  12. if ($character eq "s")
  13. {
  14. $convert_from = '\s';
  15. }
  16. elsif ($character eq "T")
  17. {
  18. $convert_from = '\t';
  19. }
  20. elsif ($character eq "Sp")
  21. {
  22. $convert_from = " ";
  23. }
  24. elsif ($character eq "Dt")
  25. {
  26. $convert_from = '\.';
  27. }
  28. elsif ($character eq "C")
  29. {
  30. $convert_from = ",";
  31. }
  32. elsif ($character eq "D")
  33. {
  34. $convert_from = "-";
  35. }
  36. elsif ($character eq "U")
  37. {
  38. $convert_from = "_";
  39. }
  40. elsif ($character eq "P")
  41. {
  42. $convert_from = '\|';
  43. }
  44. else
  45. {
  46. die "Invalid value specified for convert from\n";
  47. }
  48. if ($character eq "T")
  49. {
  50. $convert_to = "\t";
  51. }
  52. elsif ($character eq "Sp")
  53. {
  54. $convert_to = " ";
  55. }
  56. elsif ($character eq "Dt")
  57. {
  58. $convert_to = "\.";
  59. }
  60. elsif ($character eq "C")
  61. {
  62. $convert_to = ",";
  63. }
  64. elsif ($character eq "D")
  65. {
  66. $convert_to = "-";
  67. }
  68. elsif ($character eq "U")
  69. {
  70. $convert_to = "_";
  71. }
  72. elsif ($character eq "P")
  73. {
  74. $convert_to = "|";
  75. }
  76. else
  77. {
  78. die "Invalid value specified for Convert to\n";
  79. }
  80. my $fhIn;
  81. open ($fhIn, "< $inputfile") or die "Cannot open source file";
  82. my $fhOut;
  83. open ($fhOut, "> $outputfile");
  84. while (<$fhIn>)
  85. {
  86. my $thisLine = $_;
  87. chomp $thisLine;
  88. $thisLine =~ s/${convert_from}+/$convert_to/g;
  89. print $fhOut $thisLine,"\n";
  90. }
  91. close ($fhIn) or die "Cannot close source file";
  92. close ($fhOut) or die "Cannot close output file";