/inc/app/sitemailer2/lib/Bouncer.php
PHP | 724 lines | 437 code | 161 blank | 126 comment | 110 complexity | bd202fc691c6986ddb1210a173e92973 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, GPL-3.0
- <?php
- function bouncer_dsn ($m) {
- # Iterate over each message/delivery-status subpart
-
- $bounce = false;
-
- if (empty ($m->parts)) {
- return false;
- }
-
- foreach ($m->parts as $part) {
-
- if (strpos (strtolower($part->headers['content-type']), 'message') ||
- (strpos (strtolower($part->headers['content-type']), 'delivery-status'))) {
-
- # Each message/delivery-status contains a list of Message objects
- # which are the header blocks. Iterate over those too.
- # We try to dig out the Original-Recipient (which is optional) and
- # Final-Recipient (which is mandatory, but may not exactly match
- # an address on our list). Some MTA's also use X-Actual-Recipient
- # as a synonym for Original-Recipient, but some apparently use
- # that for other purposes :(
- #
- # Also grok out Action so we can do something with that too.
-
- $body = strtolower($part->body);
-
- $action = findPostFix ('action:', $body);
-
- if ($action == 'delayed') {
- return false;
- } elseif ($action == 'failed' || $action == 'failure') {
- //find recipient, this is a bounced message
- $bounce = true;
-
- // ; removed from 'rfc822;' because findPostFix removes ';'
- $recipient = findPostFix ('original-recipient:', $body);
- $recipient = findPostFix ('rfc822', $recipient);
-
- if (! empty ($recipient)) $recipients[] = $recipient;
-
- $recipient = findPostFix ('final-recipient:', $body);
- $recipient = findPostFix ('rfc822', $recipient);
- if (! empty ($recipient)) $recipients[] = $recipient;
-
- }
- }
- }
-
- if ($bounce) {
- if (! empty ($recipients)) {
- return $recipients;
- } else {
- return true;
- }
- } else {
- return false;
- }
- }
- /*Parse bounce messages generated by qmail.
- Qmail actually has a standard, called QSBMF (qmail-send bounce message
- format), as described in
- http://cr.yp.to/proto/qsbmf.txt
- */
- function bouncer_qmail ($m) {
-
- $s = strpos ($m->body, 'Hi. This is the qmail');
-
- //$s should equal 0, be careful
-
- if (! is_int ($s)) return false;
-
- //find addresses after pos $s
- $len = strlen($m->body);
-
- $lt = strpos ($m->body, '<', $s) + 1;
- $gt = strpos ($m->body, '>', $lt);
-
- $addr = substr ($m->body, $lt, $gt-$lt);
-
- if (! empty ($addr)) return array ($addr);
- else return array();
- }
- /*Parse bounce messages generated by Postfix.
- This also matches something called `Keftamail' which looks just like Postfix
- bounces with the word Postfix scratched out and the word `Keftamail' written
- in in crayon.
- It also matches something claiming to be `The BNS Postfix program', and
- `SMTP_Gateway'. Everybody's gotta be different, huh?
- */
- function bouncer_postfix ($m) {
- $bounce = false;
-
- if (empty ($m->parts)) return false;
-
- foreach ($m->parts as $part) {
-
- $part->body = strtolower ($part->body);
-
- $s = strpos ($part->body, 'the postfix program');
- if (! $s) $s = strpos ($part->body, 'the smtp_gateway program');
- if (! $s) $s = strpos ($part->body, 'the keftamail program');
- if (! $s) $s = strpos ($part->body, 'the bns postfix program');
- if (! $s) {
- //some postfixes have more os information too... regexp
- $s = preg_match ("'the postfix.*program'", $part->body);
-
- if ($s === 0) continue;
-
- $s = strpos ($part->body, 'program');
- }
-
- if (is_int ($s)) {
- $bounce = true;
-
- //now find the (second) email addy
- $lt = strpos ($part->body, '<', $s) + 1;
- $lt = strpos ($part->body, '<', $lt) + 1;
- $gt = strpos ($part->body, '>', $lt);
- $addr = substr ($part->body, $lt, $gt-$lt);
-
- //we're done
- if (! empty ($addr)) return array ($addr);
- break;
- }
- }
-
- return false;
- }
- function bouncer_yahoo ($m) {
-
- if (! is_int (strpos (strtolower($m->headers['from']), 'mailer-daemon@yahoo'))) {
- return false;
- }
-
- $m->body = strtolower ($m->body);
-
- $s = strpos ($m->body, 'to the following address');
-
- //now find the email addy
- $lt = strpos ($m->body, '<', $s) + 1;
- $gt = strpos ($m->body, '>', $lt);
- $addr = substr ($m->body, $lt, $gt-$lt);
-
- //we're done
- if (! empty ($addr)) return array ($addr);
- else return false;
- }
- function bouncer_caiwireless ($m) {
- if (empty ($m->parts)) return false;
-
- $bounce = false;
-
- foreach ($m->parts as $part) {
-
- $part->body = strtolower ($part->body);
- $s = strpos ($part->body, 'the following recipients did not receive this message');
-
- if (is_int ($s)) {
- $bounce = true;
-
- //now find the email addy
- $lt = strpos ($part->body, '<', $s) + 1;
- $gt = strpos ($part->body, '>', $lt);
- $addr = substr ($part->body, $lt, $gt-$lt);
-
- //we're done
- if (! empty ($addr)) return array ($addr);
- else return false;
- }
- }
-
- return $bounce;
- }
- //Recognizes (some) Microsoft Exchange formats.
- function bouncer_exchange ($m) {
-
- $bounce = false;
-
- if (empty ($m->parts)) return false;
-
- foreach ($m->parts as $part) {
-
- $part->body = strtolower ($part->body);
- $s = strpos ($part->body, 'did not reach the following recipient');
-
- if (is_int ($s)) {
-
- $bounce = true;
-
- //now find the email addy
- $email = findPostFix ('smtp=', $part->body);
- if (! empty ($email)) {
- return array ($email);
- }
- }
- }
-
- return $bounce;
- }
- /*Parse bounce messages generated by Exim.
- Exim adds an X-Failed-Recipients: header to bounce messages containing
- an `addresslist' of failed addresses.
- */
- function bouncer_exim ($m) {
-
- if (empty ($m->headers['x-failed-recipients'])) return false;
-
- return array ($m->headers['x-failed-recipients']);
-
- }
- /*Netscape Messaging Server bounce formats.
- I've seen at least one NMS server version 3.6 (envy.gmp.usyd.edu.au) bounce
- messages of this format. Bounces come in DSN MIME format, but don't include
- any -Recipient: headers. Gotta just parse the text :(
- NMS 4.1 (dfw-smtpin1.email.verio.net) seems even worse, but we'll try to
- decipher the format here too.
- */
- function bouncer_netscape ($m) {
-
- if (empty ($m->parts)) return false;
-
- $bounce = false;
-
- $addr = array ();
-
- foreach ($m->parts as $part) {
-
- //can be multiple failures per email
-
- $part->body = strtolower ($part->body);
-
- $s = strpos ($part->body, 'this message was undeliverable due to the following reason:');
-
- if (is_int ($s)) {
-
- $bounce = true;
-
- $email = findPostFix ('<', substr ($part->body, $s));
-
- if (!empty ($email)) {
- $addr[] = $email;
- }
- }
-
- }
-
- if (empty($addr)) return $bounce;
- else return $addr;
- }
- //Compuserve has its own weird format for bounces.
- function bouncer_compuserve ($m) {
-
- if (! empty ($m->parts)) return false;
-
- $m->body = strtolower ($m->body);
-
- $s = strpos ($m->body, 'your message could not be delivered');
-
- if (is_int ($s)) {
- //now find the email addy
- $email = findPostFix ('invalid receiver address:', $m->body);
- //we're done
- }
-
- if (! empty ($email)) return array ($email);
- else return false;
- }
- function bouncer_microsoft ($m) {
- if (empty ($m->parts)) return false;
-
- foreach ($m->parts as $part) {
-
- if (empty ($part->body)) continue;
-
- $part->body = strtolower ($part->body);
-
- $s = strpos ('transcript of session follow', $part->body);
-
- if (is_int ($s)) {
- //there should be a line with only the email address in it
-
- foreach (explode ("\n", $part->body) as $line) {
- $t = strpos ('@', $line);
-
- if (is_int ($t))
- return array (trim ($t));
- }
- } else {
-
- //try one other format
- $s = strpos ('did not reach the following recipient(s):', $part->body);
-
- if (! is_int($s)) continue;
-
- $res = findPostFix ('did not reach the following recipient(s):', $part->body);
-
- if (! empty ($res)) return array ($res);
- }
- }
-
- return false;
- }
- /*This appears to be the format for Novell GroupWise and NTMail
- X-Mailer: Novell GroupWise Internet Agent 5.5.3.1
- X-Mailer: NTMail v4.30.0012
- X-Mailer: Internet Mail Service (5.5.2653.19)
- */
- function bouncer_groupwise ($m) {
-
- if (empty ($m->headers['x-mailer'])) return false;
- if (empty ($m->parts)) return false;
-
- //find a plain/text
- foreach ($m->parts as $part) {
-
-
- $s = strpos ($part->headers['content-type'], 'text/plain');
-
- if (is_int ($s)) {
- //return addy should be here somewhere
- $part->body = strtolower ($part->body);
- $email = findPostFix ("did not reach the following recipient(s):", $part->body);
- if (! empty ($email)) return array ($email);
- } else {
-
- //probably mime/html type... look for the same after removing some html tags
- $part->body = str_replace ('<BR>', ' ', $part->body);
- $email = findPostFix ("did not reach the following recipient(s):", $part->body);
- if (! empty ($email)) return array ($email);
- }
-
-
- }
-
- return false;
- }
- /*Something which claims
- X-Mailer: <SMTP32 vXXXXXX>
- What the heck is this thing? Here's a recent host:
- % telnet 207.51.255.218 smtp
- Trying 207.51.255.218...
- Connected to 207.51.255.218.
- Escape character is '^]'.
- 220 X1 NT-ESMTP Server 208.24.118.205 (IMail 6.00 45595-15)
- */
- function bouncer_smtp32 ($m) {
- if (empty ($m->headers['x-mailer'])) return false;
-
- $s = strpos ($m->headers['x-mailer'], '<SMTP32 v');
-
- if (! is_int ($s)) return false;
-
- $mail = findPostFix ('user mailbox:', $m->body);
- if (! $mail) $mail = findPostFix ('undeliverable to', $m->body);
- if (! $mail) $mail = findPostFix ('delivery failed:', $m->body);
- if (! $mail) $mail = findPostFix ('attempts: ', $m->body);
-
- if ($mail) return array ($mail);
- else return false;
- }
- //Recognizes simple heuristically delimited bounces.
- function bouncer_simplematch ($m) {
- if (empty ($m->parts)) $body = $m->body;
- else $body = $m->parts[0]->body;
-
- $tuples = array (
- # sdm.de
- array ('here is your list of failed recipients',
- 'here is your returned mail',
- '<>'),
- # sz-sb.de, corridor.com, nfg.nl
- array ('(expanded from: ',
- 'transcript of session follows',
- 'expanded from:'),
- # robanal.demon.co.uk
- array ('this message was created automatically by mail delivery software',
- 'original message follows',
- 'rcpt to:'),
- # s1.com (InterScan E-Mail VirusWall NT ???)
- array ('message from interscan e-mail viruswall nt',
- 'end of message',
- 'rcpt to:'),
- # Smail
- array ("failed addresses follow: ",
- 'message text follows:',
- "failed addresses follow: ---------------------|"),
- # newmail.ru
- array ('This is the machine generated message from mail service.',
- '--- Below the next line is a copy of the message.',
- '<>'),
- # turbosport.com runs something called `MDaemon 3.5.2' ???
- array ('The following addresses did NOT receive a copy of your message:',
- '--- Session Transcript ---',
- '>'),
- # usa.net
- array ('Intended recipient:',
- '--------RETURNED MAIL FOLLOWS--------',
- 'Intended recipient:'),
- # hotpop.com
- array ('Undeliverable Address:',
- 'Original message attached',
- 'Undeliverable Address:'),
- # Another demon.co.uk format
- array ('This message was created automatically by mail delivery',
- '---- START OF RETURNED MESSAGE ----',
- "addressed to "),
- array ('------ Failed Recipients ------',
- '-------- Returned Mail --------',
- '<>'),
- );
-
- $body = strtolower ($body);
-
- foreach ($tuples as $t) {
- $t[0] = strtolower ($t[0]);
- $t[1] = strtolower ($t[1]);
- $t[2] = strtolower ($t[2]);
-
- $s = strpos ($body, $t[0]);
- if (! is_int ($s)) continue;
- $e = strpos ($body, $t[1]);
- if (! is_int ($e)) continue;
-
- if ($t[2] == '<>') {
- //email is surround by <>
- $haystack = substr ($body, $s, $e-$s);
- $open = strpos ($haystack, '<') + 1;
- $close = strpos ($haystack, '>', $open);
- $email = trim (substr ($haystack, $open, $close-$open));
- break;
- } elseif ($t[2] == '>') {
- $haystack = substr ($body, $s, $e-$s);
- $open = strpos ($haystack, '>') + 1;
- $close = strpos ($haystack, "\n", $open);
- $email = trim (substr ($haystack, $open, $close-$open));
- break;
- } else {
- $haystack = substr ($body, $s, $e-$s);
- $email = findPostFix ($t[2], $haystack);
- break;
-
- }
- }
-
- if (empty ($email)) return false;
- else return array ($email);
- }
- /*Yale's mail server is pretty dumb.
- Its reports include the end user's name, but not the full domain. I think we
- can usually guess it right anyway. This is completely based on examination of
- the corpse, and is subject to failure whenever Yale even slightly changes
- their MTA. :(
- */
- function bouncer_yale ($m) {
- //check if from has mailer-daemon and yale.edu
-
- $s = strpos (strtolower ($m->headers['from']), 'mailer-daemon');
- if (! is_int ($s)) return false;
- $s = strpos (strtolower ($m->headers['from']), 'yale.edu');
- if (! is_int ($s)) return false;
-
- $email = findPostFix ('--------Message not delivered to the following:', $m->body);
-
- if (! $email) return false;
- else return array ($email . '@yale.edu', $email . '@yale.cs.edu');
- }
- function bouncer_llnl ($m) {
-
- if (! is_int (strpos ($m->headers['from'], 'postmaster@llnl.gov'))) return false;
-
- $email = findPostFix ('which your message was addressed,', $m->body);
-
- if (! $email) return false;
- else return array( $email);
-
- }
- function new_is_int ($x) {
- return (is_numeric($x) ? intval($x) == $x : false);
- }
- function getFirstNameFromAddress ($x) {
-
- //possible formats
- //1. Name <email>
- //2. email
- //3. "Name@domain" <email>
- //4. <email>
-
- //return empty string if no name
-
- $x = trim ($x);
-
- $s = strpos ($x, '<');
- $e = strpos ($x, '>');
- $q = strpos($x, "\"");
-
- $name = '';
- $fname = ''; //covers 2 and 4
-
- if ($e) { //check for 1 or 3
- if ($q) { //3
- $q2 = strpos ($x, "\"", $q+1);
- if ($q2) {
- $name = substr ($x, $q, $q2-$q1);
- } else { //erroneous address???, look for '<'
- $name = trim (substr ($x, $q, $q-$s));
- }
- } else { //1
- $name = trim (substr ($x, 0, $s));
- }
- //break up name if theres a space in it;
- $s = strpos ($name, ' ');
- if($s) {
- $fname = substr ($name, 0, $s);
- } else {
- $fname = $name;
- }
- }
-
- return $fname;
- }
- function getLastNameFromAddress ($x) {
-
- //possible formats
- //1. Name <email>
- //2. email
- //3. "Name@domain" <email>
- //4. <email>
-
- //return empty string if no name
-
- $x = trim ($x);
-
- $s = strpos ($x, '<');
- $e = strpos ($x, '>');
- $q = strpos($x, "\"");
-
- $name = '';
- $lname = ''; //covers 2 and 4
-
- if ($e) { //check for 1 or 3
- if ($q) { //3
- $q2 = strpos ($x, "\"", $q+1);
- if ($q2) {
- $name = substr ($x, $q, $q2-$q1);
- } else { //erroneous address???, look for '<'
- $name = trim (substr ($x, $q, $q-$s));
- }
- } else { //1
- $name = trim (substr ($x, 0, $s));
- }
- //break up name if theres a space in it;
- $s = strpos ($name, ' ');
- if($s) {
- $lname = substr ($name, $s, $e-$s);
- } else {
- $lname = '';
- }
- }
-
- return trim ($lname);
- }
- function getEmailFromAddress ($x) {
-
- //possible formats
- //1. Name <email>
- //2. email
- //3. "Name@domain" <email>
- //4. <email>
-
- //return empty string if no name
-
- $x = trim ($x);
-
- $s = strpos ($x, '<');
- $e = strpos ($x, '>');
-
- $addy = '';
-
- if ($e) { //1, 3 and 4
- $addy = substr ($x, $s+1, $e-$s-1);
- } else { //2
- $addy = $x;
- }
-
- return strtolower (trim ($addy));
- }
- function findPostfix ($needle, $haystack) {
-
- $haystack = str_replace ("\n", ' ', $haystack);
-
- $s = strpos ($haystack, $needle);
-
- if (! is_int($s)) return false;
-
- $s += strlen ($needle);
-
- //find the next non space char
- for ($i = $s; $i < strlen($haystack); $i++) {
- if (substr ($haystack, $i, 1) == ' ') {
- //
- } else {
- $s = $i;
- break;
- }
- }
-
- for ($i = $s; $i < strlen($haystack); $i++) {
-
- if ($i == strlen($haystack) - 1) {
- $e = strlen ($haystack);
- } else if (substr ($haystack, $i, 1) == "," || //chars that shouldnt be in an email address
- substr ($haystack, $i, 1) == ' ' || //and often end an email address
- substr ($haystack, $i, 1) == ')' ||
- substr ($haystack, $i, 1) == '>') {
- $e = $i;
- break;
- }
- }
-
- //hack
- $final = trim (substr ($haystack, $s, $e-$s));
- $final = str_replace ("'", '', $final);
- $final = str_replace (";", '', $final);
- $final = str_replace ("<", '', $final);
- return $final;
- }
-
- //takes an array of email address to unsub
- function unsub ($addrs) {
-
- foreach ($addrs as $email) {
-
- //get the recipient id(s) for the email address
- $recipients = db_shift_array ('select id from sitemailer2_recipient where email=?', $email);
-
- //see if we need to unsub
- $max_bounces = appconf ('disable_subscriber_after_bounces');
-
- //only consider bounces younger than 3 months
- list ($date, $time) = explode (' ', date('Y-m-d-H-i-s'));
- list ($year, $month, $day, $hour, $minute, $second) = explode ('-', $date);
- $max_date = date ('Y-m-d H:i:s', mktime ($hour, $minute, $second, $month-3, $day, $year));
-
- //we need to find out how many times each recipient id has been bounced
- foreach ($recipients as $r) {
-
- $count = db_shift ('select count(id) from sitemailer2_bounces where recipient=? and occurred > ?', $r, $max_date);
-
- if ($count >= $max_bounces) {
-
- //need to unsub
- db_execute ('update sitemailer2_recipient_in_newsletter set status="unsubscribed" where recipient=?', $r);
- db_execute ('update sitemailer2_recipient set status="disabled" where id=?', $r);
- } else {
-
- //count this bounce
- if (! db_execute ('insert into sitemailer2_bounces (id, recipient, message,
- occurred) values (null, ?, "", now())', $r)) {
- echo "Failed to updates bounces\n";
- }
- }
- }
- }
- }
- function count_bounce ($num) {
-
- //make a best guess of what message this bounce comes from
- //base the guess on proximity to message start date
- //assume message was bounced now
-
- $best_guess = db_shift ('select id from sitemailer2_message where start < now() and status in("running", "done") order by start desc limit 1');
-
- if ($best_guess) {
- db_execute ('update sitemailer2_message set num_bounced=num_bounced+? where id = ?', $num, $best_guess);
- } else {
- echo "Failed to determine what message resulted in bounce \n";
- }
- }
- ?>