PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/inc/app/sitemailer2/lib/Bouncer.php

https://github.com/lux/sitellite
PHP | 724 lines | 437 code | 161 blank | 126 comment | 110 complexity | bd202fc691c6986ddb1210a173e92973 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, GPL-3.0
  1. <?php
  2. function bouncer_dsn ($m) {
  3. # Iterate over each message/delivery-status subpart
  4. $bounce = false;
  5. if (empty ($m->parts)) {
  6. return false;
  7. }
  8. foreach ($m->parts as $part) {
  9. if (strpos (strtolower($part->headers['content-type']), 'message') ||
  10. (strpos (strtolower($part->headers['content-type']), 'delivery-status'))) {
  11. # Each message/delivery-status contains a list of Message objects
  12. # which are the header blocks. Iterate over those too.
  13. # We try to dig out the Original-Recipient (which is optional) and
  14. # Final-Recipient (which is mandatory, but may not exactly match
  15. # an address on our list). Some MTA's also use X-Actual-Recipient
  16. # as a synonym for Original-Recipient, but some apparently use
  17. # that for other purposes :(
  18. #
  19. # Also grok out Action so we can do something with that too.
  20. $body = strtolower($part->body);
  21. $action = findPostFix ('action:', $body);
  22. if ($action == 'delayed') {
  23. return false;
  24. } elseif ($action == 'failed' || $action == 'failure') {
  25. //find recipient, this is a bounced message
  26. $bounce = true;
  27. // ; removed from 'rfc822;' because findPostFix removes ';'
  28. $recipient = findPostFix ('original-recipient:', $body);
  29. $recipient = findPostFix ('rfc822', $recipient);
  30. if (! empty ($recipient)) $recipients[] = $recipient;
  31. $recipient = findPostFix ('final-recipient:', $body);
  32. $recipient = findPostFix ('rfc822', $recipient);
  33. if (! empty ($recipient)) $recipients[] = $recipient;
  34. }
  35. }
  36. }
  37. if ($bounce) {
  38. if (! empty ($recipients)) {
  39. return $recipients;
  40. } else {
  41. return true;
  42. }
  43. } else {
  44. return false;
  45. }
  46. }
  47. /*Parse bounce messages generated by qmail.
  48. Qmail actually has a standard, called QSBMF (qmail-send bounce message
  49. format), as described in
  50. http://cr.yp.to/proto/qsbmf.txt
  51. */
  52. function bouncer_qmail ($m) {
  53. $s = strpos ($m->body, 'Hi. This is the qmail');
  54. //$s should equal 0, be careful
  55. if (! is_int ($s)) return false;
  56. //find addresses after pos $s
  57. $len = strlen($m->body);
  58. $lt = strpos ($m->body, '<', $s) + 1;
  59. $gt = strpos ($m->body, '>', $lt);
  60. $addr = substr ($m->body, $lt, $gt-$lt);
  61. if (! empty ($addr)) return array ($addr);
  62. else return array();
  63. }
  64. /*Parse bounce messages generated by Postfix.
  65. This also matches something called `Keftamail' which looks just like Postfix
  66. bounces with the word Postfix scratched out and the word `Keftamail' written
  67. in in crayon.
  68. It also matches something claiming to be `The BNS Postfix program', and
  69. `SMTP_Gateway'. Everybody's gotta be different, huh?
  70. */
  71. function bouncer_postfix ($m) {
  72. $bounce = false;
  73. if (empty ($m->parts)) return false;
  74. foreach ($m->parts as $part) {
  75. $part->body = strtolower ($part->body);
  76. $s = strpos ($part->body, 'the postfix program');
  77. if (! $s) $s = strpos ($part->body, 'the smtp_gateway program');
  78. if (! $s) $s = strpos ($part->body, 'the keftamail program');
  79. if (! $s) $s = strpos ($part->body, 'the bns postfix program');
  80. if (! $s) {
  81. //some postfixes have more os information too... regexp
  82. $s = preg_match ("'the postfix.*program'", $part->body);
  83. if ($s === 0) continue;
  84. $s = strpos ($part->body, 'program');
  85. }
  86. if (is_int ($s)) {
  87. $bounce = true;
  88. //now find the (second) email addy
  89. $lt = strpos ($part->body, '<', $s) + 1;
  90. $lt = strpos ($part->body, '<', $lt) + 1;
  91. $gt = strpos ($part->body, '>', $lt);
  92. $addr = substr ($part->body, $lt, $gt-$lt);
  93. //we're done
  94. if (! empty ($addr)) return array ($addr);
  95. break;
  96. }
  97. }
  98. return false;
  99. }
  100. function bouncer_yahoo ($m) {
  101. if (! is_int (strpos (strtolower($m->headers['from']), 'mailer-daemon@yahoo'))) {
  102. return false;
  103. }
  104. $m->body = strtolower ($m->body);
  105. $s = strpos ($m->body, 'to the following address');
  106. //now find the email addy
  107. $lt = strpos ($m->body, '<', $s) + 1;
  108. $gt = strpos ($m->body, '>', $lt);
  109. $addr = substr ($m->body, $lt, $gt-$lt);
  110. //we're done
  111. if (! empty ($addr)) return array ($addr);
  112. else return false;
  113. }
  114. function bouncer_caiwireless ($m) {
  115. if (empty ($m->parts)) return false;
  116. $bounce = false;
  117. foreach ($m->parts as $part) {
  118. $part->body = strtolower ($part->body);
  119. $s = strpos ($part->body, 'the following recipients did not receive this message');
  120. if (is_int ($s)) {
  121. $bounce = true;
  122. //now find the email addy
  123. $lt = strpos ($part->body, '<', $s) + 1;
  124. $gt = strpos ($part->body, '>', $lt);
  125. $addr = substr ($part->body, $lt, $gt-$lt);
  126. //we're done
  127. if (! empty ($addr)) return array ($addr);
  128. else return false;
  129. }
  130. }
  131. return $bounce;
  132. }
  133. //Recognizes (some) Microsoft Exchange formats.
  134. function bouncer_exchange ($m) {
  135. $bounce = false;
  136. if (empty ($m->parts)) return false;
  137. foreach ($m->parts as $part) {
  138. $part->body = strtolower ($part->body);
  139. $s = strpos ($part->body, 'did not reach the following recipient');
  140. if (is_int ($s)) {
  141. $bounce = true;
  142. //now find the email addy
  143. $email = findPostFix ('smtp=', $part->body);
  144. if (! empty ($email)) {
  145. return array ($email);
  146. }
  147. }
  148. }
  149. return $bounce;
  150. }
  151. /*Parse bounce messages generated by Exim.
  152. Exim adds an X-Failed-Recipients: header to bounce messages containing
  153. an `addresslist' of failed addresses.
  154. */
  155. function bouncer_exim ($m) {
  156. if (empty ($m->headers['x-failed-recipients'])) return false;
  157. return array ($m->headers['x-failed-recipients']);
  158. }
  159. /*Netscape Messaging Server bounce formats.
  160. I've seen at least one NMS server version 3.6 (envy.gmp.usyd.edu.au) bounce
  161. messages of this format. Bounces come in DSN MIME format, but don't include
  162. any -Recipient: headers. Gotta just parse the text :(
  163. NMS 4.1 (dfw-smtpin1.email.verio.net) seems even worse, but we'll try to
  164. decipher the format here too.
  165. */
  166. function bouncer_netscape ($m) {
  167. if (empty ($m->parts)) return false;
  168. $bounce = false;
  169. $addr = array ();
  170. foreach ($m->parts as $part) {
  171. //can be multiple failures per email
  172. $part->body = strtolower ($part->body);
  173. $s = strpos ($part->body, 'this message was undeliverable due to the following reason:');
  174. if (is_int ($s)) {
  175. $bounce = true;
  176. $email = findPostFix ('<', substr ($part->body, $s));
  177. if (!empty ($email)) {
  178. $addr[] = $email;
  179. }
  180. }
  181. }
  182. if (empty($addr)) return $bounce;
  183. else return $addr;
  184. }
  185. //Compuserve has its own weird format for bounces.
  186. function bouncer_compuserve ($m) {
  187. if (! empty ($m->parts)) return false;
  188. $m->body = strtolower ($m->body);
  189. $s = strpos ($m->body, 'your message could not be delivered');
  190. if (is_int ($s)) {
  191. //now find the email addy
  192. $email = findPostFix ('invalid receiver address:', $m->body);
  193. //we're done
  194. }
  195. if (! empty ($email)) return array ($email);
  196. else return false;
  197. }
  198. function bouncer_microsoft ($m) {
  199. if (empty ($m->parts)) return false;
  200. foreach ($m->parts as $part) {
  201. if (empty ($part->body)) continue;
  202. $part->body = strtolower ($part->body);
  203. $s = strpos ('transcript of session follow', $part->body);
  204. if (is_int ($s)) {
  205. //there should be a line with only the email address in it
  206. foreach (explode ("\n", $part->body) as $line) {
  207. $t = strpos ('@', $line);
  208. if (is_int ($t))
  209. return array (trim ($t));
  210. }
  211. } else {
  212. //try one other format
  213. $s = strpos ('did not reach the following recipient(s):', $part->body);
  214. if (! is_int($s)) continue;
  215. $res = findPostFix ('did not reach the following recipient(s):', $part->body);
  216. if (! empty ($res)) return array ($res);
  217. }
  218. }
  219. return false;
  220. }
  221. /*This appears to be the format for Novell GroupWise and NTMail
  222. X-Mailer: Novell GroupWise Internet Agent 5.5.3.1
  223. X-Mailer: NTMail v4.30.0012
  224. X-Mailer: Internet Mail Service (5.5.2653.19)
  225. */
  226. function bouncer_groupwise ($m) {
  227. if (empty ($m->headers['x-mailer'])) return false;
  228. if (empty ($m->parts)) return false;
  229. //find a plain/text
  230. foreach ($m->parts as $part) {
  231. $s = strpos ($part->headers['content-type'], 'text/plain');
  232. if (is_int ($s)) {
  233. //return addy should be here somewhere
  234. $part->body = strtolower ($part->body);
  235. $email = findPostFix ("did not reach the following recipient(s):", $part->body);
  236. if (! empty ($email)) return array ($email);
  237. } else {
  238. //probably mime/html type... look for the same after removing some html tags
  239. $part->body = str_replace ('<BR>', ' ', $part->body);
  240. $email = findPostFix ("did not reach the following recipient(s):", $part->body);
  241. if (! empty ($email)) return array ($email);
  242. }
  243. }
  244. return false;
  245. }
  246. /*Something which claims
  247. X-Mailer: <SMTP32 vXXXXXX>
  248. What the heck is this thing? Here's a recent host:
  249. % telnet 207.51.255.218 smtp
  250. Trying 207.51.255.218...
  251. Connected to 207.51.255.218.
  252. Escape character is '^]'.
  253. 220 X1 NT-ESMTP Server 208.24.118.205 (IMail 6.00 45595-15)
  254. */
  255. function bouncer_smtp32 ($m) {
  256. if (empty ($m->headers['x-mailer'])) return false;
  257. $s = strpos ($m->headers['x-mailer'], '<SMTP32 v');
  258. if (! is_int ($s)) return false;
  259. $mail = findPostFix ('user mailbox:', $m->body);
  260. if (! $mail) $mail = findPostFix ('undeliverable to', $m->body);
  261. if (! $mail) $mail = findPostFix ('delivery failed:', $m->body);
  262. if (! $mail) $mail = findPostFix ('attempts: ', $m->body);
  263. if ($mail) return array ($mail);
  264. else return false;
  265. }
  266. //Recognizes simple heuristically delimited bounces.
  267. function bouncer_simplematch ($m) {
  268. if (empty ($m->parts)) $body = $m->body;
  269. else $body = $m->parts[0]->body;
  270. $tuples = array (
  271. # sdm.de
  272. array ('here is your list of failed recipients',
  273. 'here is your returned mail',
  274. '<>'),
  275. # sz-sb.de, corridor.com, nfg.nl
  276. array ('(expanded from: ',
  277. 'transcript of session follows',
  278. 'expanded from:'),
  279. # robanal.demon.co.uk
  280. array ('this message was created automatically by mail delivery software',
  281. 'original message follows',
  282. 'rcpt to:'),
  283. # s1.com (InterScan E-Mail VirusWall NT ???)
  284. array ('message from interscan e-mail viruswall nt',
  285. 'end of message',
  286. 'rcpt to:'),
  287. # Smail
  288. array ("failed addresses follow: ",
  289. 'message text follows:',
  290. "failed addresses follow: ---------------------|"),
  291. # newmail.ru
  292. array ('This is the machine generated message from mail service.',
  293. '--- Below the next line is a copy of the message.',
  294. '<>'),
  295. # turbosport.com runs something called `MDaemon 3.5.2' ???
  296. array ('The following addresses did NOT receive a copy of your message:',
  297. '--- Session Transcript ---',
  298. '>'),
  299. # usa.net
  300. array ('Intended recipient:',
  301. '--------RETURNED MAIL FOLLOWS--------',
  302. 'Intended recipient:'),
  303. # hotpop.com
  304. array ('Undeliverable Address:',
  305. 'Original message attached',
  306. 'Undeliverable Address:'),
  307. # Another demon.co.uk format
  308. array ('This message was created automatically by mail delivery',
  309. '---- START OF RETURNED MESSAGE ----',
  310. "addressed to "),
  311. array ('------ Failed Recipients ------',
  312. '-------- Returned Mail --------',
  313. '<>'),
  314. );
  315. $body = strtolower ($body);
  316. foreach ($tuples as $t) {
  317. $t[0] = strtolower ($t[0]);
  318. $t[1] = strtolower ($t[1]);
  319. $t[2] = strtolower ($t[2]);
  320. $s = strpos ($body, $t[0]);
  321. if (! is_int ($s)) continue;
  322. $e = strpos ($body, $t[1]);
  323. if (! is_int ($e)) continue;
  324. if ($t[2] == '<>') {
  325. //email is surround by <>
  326. $haystack = substr ($body, $s, $e-$s);
  327. $open = strpos ($haystack, '<') + 1;
  328. $close = strpos ($haystack, '>', $open);
  329. $email = trim (substr ($haystack, $open, $close-$open));
  330. break;
  331. } elseif ($t[2] == '>') {
  332. $haystack = substr ($body, $s, $e-$s);
  333. $open = strpos ($haystack, '>') + 1;
  334. $close = strpos ($haystack, "\n", $open);
  335. $email = trim (substr ($haystack, $open, $close-$open));
  336. break;
  337. } else {
  338. $haystack = substr ($body, $s, $e-$s);
  339. $email = findPostFix ($t[2], $haystack);
  340. break;
  341. }
  342. }
  343. if (empty ($email)) return false;
  344. else return array ($email);
  345. }
  346. /*Yale's mail server is pretty dumb.
  347. Its reports include the end user's name, but not the full domain. I think we
  348. can usually guess it right anyway. This is completely based on examination of
  349. the corpse, and is subject to failure whenever Yale even slightly changes
  350. their MTA. :(
  351. */
  352. function bouncer_yale ($m) {
  353. //check if from has mailer-daemon and yale.edu
  354. $s = strpos (strtolower ($m->headers['from']), 'mailer-daemon');
  355. if (! is_int ($s)) return false;
  356. $s = strpos (strtolower ($m->headers['from']), 'yale.edu');
  357. if (! is_int ($s)) return false;
  358. $email = findPostFix ('--------Message not delivered to the following:', $m->body);
  359. if (! $email) return false;
  360. else return array ($email . '@yale.edu', $email . '@yale.cs.edu');
  361. }
  362. function bouncer_llnl ($m) {
  363. if (! is_int (strpos ($m->headers['from'], 'postmaster@llnl.gov'))) return false;
  364. $email = findPostFix ('which your message was addressed,', $m->body);
  365. if (! $email) return false;
  366. else return array( $email);
  367. }
  368. function new_is_int ($x) {
  369. return (is_numeric($x) ? intval($x) == $x : false);
  370. }
  371. function getFirstNameFromAddress ($x) {
  372. //possible formats
  373. //1. Name <email>
  374. //2. email
  375. //3. "Name@domain" <email>
  376. //4. <email>
  377. //return empty string if no name
  378. $x = trim ($x);
  379. $s = strpos ($x, '<');
  380. $e = strpos ($x, '>');
  381. $q = strpos($x, "\"");
  382. $name = '';
  383. $fname = ''; //covers 2 and 4
  384. if ($e) { //check for 1 or 3
  385. if ($q) { //3
  386. $q2 = strpos ($x, "\"", $q+1);
  387. if ($q2) {
  388. $name = substr ($x, $q, $q2-$q1);
  389. } else { //erroneous address???, look for '<'
  390. $name = trim (substr ($x, $q, $q-$s));
  391. }
  392. } else { //1
  393. $name = trim (substr ($x, 0, $s));
  394. }
  395. //break up name if theres a space in it;
  396. $s = strpos ($name, ' ');
  397. if($s) {
  398. $fname = substr ($name, 0, $s);
  399. } else {
  400. $fname = $name;
  401. }
  402. }
  403. return $fname;
  404. }
  405. function getLastNameFromAddress ($x) {
  406. //possible formats
  407. //1. Name <email>
  408. //2. email
  409. //3. "Name@domain" <email>
  410. //4. <email>
  411. //return empty string if no name
  412. $x = trim ($x);
  413. $s = strpos ($x, '<');
  414. $e = strpos ($x, '>');
  415. $q = strpos($x, "\"");
  416. $name = '';
  417. $lname = ''; //covers 2 and 4
  418. if ($e) { //check for 1 or 3
  419. if ($q) { //3
  420. $q2 = strpos ($x, "\"", $q+1);
  421. if ($q2) {
  422. $name = substr ($x, $q, $q2-$q1);
  423. } else { //erroneous address???, look for '<'
  424. $name = trim (substr ($x, $q, $q-$s));
  425. }
  426. } else { //1
  427. $name = trim (substr ($x, 0, $s));
  428. }
  429. //break up name if theres a space in it;
  430. $s = strpos ($name, ' ');
  431. if($s) {
  432. $lname = substr ($name, $s, $e-$s);
  433. } else {
  434. $lname = '';
  435. }
  436. }
  437. return trim ($lname);
  438. }
  439. function getEmailFromAddress ($x) {
  440. //possible formats
  441. //1. Name <email>
  442. //2. email
  443. //3. "Name@domain" <email>
  444. //4. <email>
  445. //return empty string if no name
  446. $x = trim ($x);
  447. $s = strpos ($x, '<');
  448. $e = strpos ($x, '>');
  449. $addy = '';
  450. if ($e) { //1, 3 and 4
  451. $addy = substr ($x, $s+1, $e-$s-1);
  452. } else { //2
  453. $addy = $x;
  454. }
  455. return strtolower (trim ($addy));
  456. }
  457. function findPostfix ($needle, $haystack) {
  458. $haystack = str_replace ("\n", ' ', $haystack);
  459. $s = strpos ($haystack, $needle);
  460. if (! is_int($s)) return false;
  461. $s += strlen ($needle);
  462. //find the next non space char
  463. for ($i = $s; $i < strlen($haystack); $i++) {
  464. if (substr ($haystack, $i, 1) == ' ') {
  465. //
  466. } else {
  467. $s = $i;
  468. break;
  469. }
  470. }
  471. for ($i = $s; $i < strlen($haystack); $i++) {
  472. if ($i == strlen($haystack) - 1) {
  473. $e = strlen ($haystack);
  474. } else if (substr ($haystack, $i, 1) == "," || //chars that shouldnt be in an email address
  475. substr ($haystack, $i, 1) == ' ' || //and often end an email address
  476. substr ($haystack, $i, 1) == ')' ||
  477. substr ($haystack, $i, 1) == '>') {
  478. $e = $i;
  479. break;
  480. }
  481. }
  482. //hack
  483. $final = trim (substr ($haystack, $s, $e-$s));
  484. $final = str_replace ("'", '', $final);
  485. $final = str_replace (";", '', $final);
  486. $final = str_replace ("<", '', $final);
  487. return $final;
  488. }
  489. //takes an array of email address to unsub
  490. function unsub ($addrs) {
  491. foreach ($addrs as $email) {
  492. //get the recipient id(s) for the email address
  493. $recipients = db_shift_array ('select id from sitemailer2_recipient where email=?', $email);
  494. //see if we need to unsub
  495. $max_bounces = appconf ('disable_subscriber_after_bounces');
  496. //only consider bounces younger than 3 months
  497. list ($date, $time) = explode (' ', date('Y-m-d-H-i-s'));
  498. list ($year, $month, $day, $hour, $minute, $second) = explode ('-', $date);
  499. $max_date = date ('Y-m-d H:i:s', mktime ($hour, $minute, $second, $month-3, $day, $year));
  500. //we need to find out how many times each recipient id has been bounced
  501. foreach ($recipients as $r) {
  502. $count = db_shift ('select count(id) from sitemailer2_bounces where recipient=? and occurred > ?', $r, $max_date);
  503. if ($count >= $max_bounces) {
  504. //need to unsub
  505. db_execute ('update sitemailer2_recipient_in_newsletter set status="unsubscribed" where recipient=?', $r);
  506. db_execute ('update sitemailer2_recipient set status="disabled" where id=?', $r);
  507. } else {
  508. //count this bounce
  509. if (! db_execute ('insert into sitemailer2_bounces (id, recipient, message,
  510. occurred) values (null, ?, "", now())', $r)) {
  511. echo "Failed to updates bounces\n";
  512. }
  513. }
  514. }
  515. }
  516. }
  517. function count_bounce ($num) {
  518. //make a best guess of what message this bounce comes from
  519. //base the guess on proximity to message start date
  520. //assume message was bounced now
  521. $best_guess = db_shift ('select id from sitemailer2_message where start < now() and status in("running", "done") order by start desc limit 1');
  522. if ($best_guess) {
  523. db_execute ('update sitemailer2_message set num_bounced=num_bounced+? where id = ?', $num, $best_guess);
  524. } else {
  525. echo "Failed to determine what message resulted in bounce \n";
  526. }
  527. }
  528. ?>