/bc-extract-attachments.pl

https://github.com/barrycarter/bcapps · Perl · 113 lines · 48 code · 31 blank · 34 comment · 9 complexity · 75cffc18919a7d4b3119633bcb40f75b MD5 · raw file

  1. #!/bin/perl
  2. # Hideous hack: finds pieces of messages that "look like" MIME
  3. # attachments and stores them in files, replacing the attachment with
  4. # a text string
  5. # Options:
  6. #
  7. # --overwrite: overwrite output file (only for testing!)
  8. # --zombie: use program knowing it doesnt work
  9. # TODO: MAYBE make zombie a global option in bclib.pl
  10. require "/usr/local/lib/bclib.pl";
  11. unless ($globopts{zombie}) {
  12. die "DO NOT USE; Perl 32767 char regexp error breaks this program";
  13. }
  14. (($file) = shift) || die("Usage: $0 filename");
  15. $outfile = "$file.extracted";
  16. # during testing only
  17. # $globopts{debug} = 1;
  18. # in test mode, delete the attachment I'm having trouble with, forcing
  19. # prg to re-create it
  20. if ($globopts{test}) {system("rm /usr/local/etc/sha/372765976e150ed47f3449f1e1c07087cd41e0de /usr/local/etc/sha/2abca5a6deb95baf32bdab1b4d5ffedf0476166c");}
  21. if (-f $outfile && !$globopts{overwrite}) {
  22. die ("$outfile exists and I'm too chicken to overwrite it");
  23. }
  24. system("rm $outfile");
  25. # handle bzipped files
  26. if ($file=~/\.bz2$/) {
  27. open(A,"bzcat $file|")||die("Can't open pipe $file, $!");
  28. } else {
  29. open(A,$file)||die("Can't open $file, $!");
  30. }
  31. while (<A>) {
  32. # could I use redo here?
  33. # handle message we just saw (handle_msg'll ignore empty call on first msg)
  34. if (/^From /) {
  35. $num++;
  36. handle_attachments(@msg);
  37. @msg=();
  38. debug("MSG: $num");
  39. }
  40. push(@msg,$_);
  41. }
  42. # last one
  43. handle_attachments(@msg);
  44. # during testing only
  45. # system("bc-check-extract-attachments.pl --debug $file");
  46. # sample MIME line:
  47. # MDAwOTg2IDY1NTM1IGYNCjAwMDAwMDA5ODcgNjU1MzUgZg0KMDAwMDAwMDk4OCA2NTUzNSBmDQow
  48. # this should probably be handle_message()
  49. sub handle_attachments {
  50. my($msg) = join("",@_);
  51. my($chars) = "[a-zA-Z0-9\+\/]";
  52. # note that $2 is just the last line repeated
  53. $msg=~s/(\n($chars{50,}\=*\n)+)($chars+\=*\n)/handle_attachment("$1$3")/seg;
  54. # and append to outfile
  55. append_file($msg,$outfile);
  56. }
  57. # handles a single attachment
  58. sub handle_attachment {
  59. my($attach, $hashref) = @_;
  60. # debug("ATTACHMENT",$attach);
  61. # ignore tiny attachments
  62. if (length($attach)<10000) {return $attach;}
  63. # because I'm going to return two newlines anyway, strip them here
  64. $attach=~s/^\n//s;
  65. $attach=~s/\n$//s;
  66. # it's tempting to mime-decode here, but no
  67. # using sha1 here (instead of just random) lets identical
  68. # attachments share space
  69. my($sha) = sha1_hex($attach);
  70. debug("SHA: /usr/local/etc/sha/$sha");
  71. # if it already exists, no point in writing it
  72. unless (-f "/usr/local/etc/sha/$sha") {
  73. write_file($attach,"/usr/local/etc/sha/$sha");
  74. # half-hearted attempt to decode
  75. system("base64 -d /usr/local/etc/sha/$sha > /usr/local/etc/sha/$sha.dec");
  76. }
  77. my($ret) = encode_base64("[SEE /usr/local/etc/sha/$sha]");
  78. # nuke internal newlines to base64, surround with newlines
  79. $ret=~s/\n//isg;
  80. $ret="\n$ret\n";
  81. return $ret;
  82. }