PageRenderTime 60ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/program/include/rcube_mime.php

https://github.com/netconstructor/roundcubemail
PHP | 705 lines | 415 code | 98 blank | 192 comment | 144 complexity | 87301d89d7ef3020489061f9fa107aad MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1
  1. <?php
  2. /*
  3. +-----------------------------------------------------------------------+
  4. | program/include/rcube_mime.php |
  5. | |
  6. | This file is part of the Roundcube Webmail client |
  7. | Copyright (C) 2005-2012, The Roundcube Dev Team |
  8. | Copyright (C) 2011-2012, Kolab Systems AG |
  9. | |
  10. | Licensed under the GNU General Public License version 3 or |
  11. | any later version with exceptions for skins & plugins. |
  12. | See the README file for a full license statement. |
  13. | |
  14. | PURPOSE: |
  15. | MIME message parsing utilities |
  16. | |
  17. +-----------------------------------------------------------------------+
  18. | Author: Thomas Bruederli <roundcube@gmail.com> |
  19. | Author: Aleksander Machniak <alec@alec.pl> |
  20. +-----------------------------------------------------------------------+
  21. */
  22. /**
  23. * Class for parsing MIME messages
  24. *
  25. * @package Framework
  26. * @subpackage Storage
  27. * @author Thomas Bruederli <roundcube@gmail.com>
  28. * @author Aleksander Machniak <alec@alec.pl>
  29. */
  30. class rcube_mime
  31. {
  32. private static $default_charset;
  33. /**
  34. * Object constructor.
  35. */
  36. function __construct($default_charset = null)
  37. {
  38. self::$default_charset = $default_charset;
  39. }
  40. /**
  41. * Returns message/object character set name
  42. *
  43. * @return string Characted set name
  44. */
  45. public static function get_charset()
  46. {
  47. if (self::$default_charset) {
  48. return self::$default_charset;
  49. }
  50. if ($charset = rcube::get_instance()->config->get('default_charset')) {
  51. return $charset;
  52. }
  53. return RCMAIL_CHARSET;
  54. }
  55. /**
  56. * Parse the given raw message source and return a structure
  57. * of rcube_message_part objects.
  58. *
  59. * It makes use of the PEAR:Mail_mimeDecode library
  60. *
  61. * @param string The message source
  62. * @return object rcube_message_part The message structure
  63. */
  64. public static function parse_message($raw_body)
  65. {
  66. $mime = new Mail_mimeDecode($raw_body);
  67. $struct = $mime->decode(array('include_bodies' => true, 'decode_bodies' => true));
  68. return self::structure_part($struct);
  69. }
  70. /**
  71. * Recursive method to convert a Mail_mimeDecode part into a rcube_message_part object
  72. *
  73. * @param object A message part struct
  74. * @param int Part count
  75. * @param string Parent MIME ID
  76. *
  77. * @return object rcube_message_part
  78. */
  79. private static function structure_part($part, $count=0, $parent='')
  80. {
  81. $struct = new rcube_message_part;
  82. $struct->mime_id = $part->mime_id ? $part->mime_id : (empty($parent) ? (string)$count : "$parent.$count");
  83. $struct->headers = $part->headers;
  84. $struct->ctype_primary = $part->ctype_primary;
  85. $struct->ctype_secondary = $part->ctype_secondary;
  86. $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
  87. $struct->ctype_parameters = $part->ctype_parameters;
  88. if ($part->headers['content-transfer-encoding'])
  89. $struct->encoding = $part->headers['content-transfer-encoding'];
  90. if ($part->ctype_parameters['charset'])
  91. $struct->charset = $part->ctype_parameters['charset'];
  92. $part_charset = $struct->charset ? $struct->charset : self::get_charset();
  93. // determine filename
  94. if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) {
  95. $struct->filename = rcube_mime::decode_mime_string($filename, $part_charset);
  96. }
  97. // copy part body and convert it to UTF-8 if necessary
  98. $struct->body = $part->ctype_primary == 'text' || !$part->ctype_parameters['charset'] ? rcube_charset::convert($part->body, $part_charset) : $part->body;
  99. $struct->size = strlen($part->body);
  100. $struct->disposition = $part->disposition;
  101. foreach ((array)$part->parts as $child_part) {
  102. $struct->parts[] = self::structure_part($child_part, ++$count, $struct->mime_id);
  103. }
  104. return $struct;
  105. }
  106. /**
  107. * Split an address list into a structured array list
  108. *
  109. * @param string $input Input string
  110. * @param int $max List only this number of addresses
  111. * @param boolean $decode Decode address strings
  112. * @param string $fallback Fallback charset if none specified
  113. *
  114. * @return array Indexed list of addresses
  115. */
  116. static function decode_address_list($input, $max = null, $decode = true, $fallback = null)
  117. {
  118. $a = self::parse_address_list($input, $decode, $fallback);
  119. $out = array();
  120. $j = 0;
  121. // Special chars as defined by RFC 822 need to in quoted string (or escaped).
  122. $special_chars = '[\(\)\<\>\\\.\[\]@,;:"]';
  123. if (!is_array($a))
  124. return $out;
  125. foreach ($a as $val) {
  126. $j++;
  127. $address = trim($val['address']);
  128. $name = trim($val['name']);
  129. if ($name && $address && $name != $address)
  130. $string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address);
  131. else if ($address)
  132. $string = $address;
  133. else if ($name)
  134. $string = $name;
  135. $out[$j] = array(
  136. 'name' => $name,
  137. 'mailto' => $address,
  138. 'string' => $string
  139. );
  140. if ($max && $j==$max)
  141. break;
  142. }
  143. return $out;
  144. }
  145. /**
  146. * Decode a message header value
  147. *
  148. * @param string $input Header value
  149. * @param string $fallback Fallback charset if none specified
  150. *
  151. * @return string Decoded string
  152. */
  153. public static function decode_header($input, $fallback = null)
  154. {
  155. $str = self::decode_mime_string((string)$input, $fallback);
  156. return $str;
  157. }
  158. /**
  159. * Decode a mime-encoded string to internal charset
  160. *
  161. * @param string $input Header value
  162. * @param string $fallback Fallback charset if none specified
  163. *
  164. * @return string Decoded string
  165. */
  166. public static function decode_mime_string($input, $fallback = null)
  167. {
  168. $default_charset = !empty($fallback) ? $fallback : self::get_charset();
  169. // rfc: all line breaks or other characters not found
  170. // in the Base64 Alphabet must be ignored by decoding software
  171. // delete all blanks between MIME-lines, differently we can
  172. // receive unnecessary blanks and broken utf-8 symbols
  173. $input = preg_replace("/\?=\s+=\?/", '?==?', $input);
  174. // encoded-word regexp
  175. $re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/';
  176. // Find all RFC2047's encoded words
  177. if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
  178. // Initialize variables
  179. $tmp = array();
  180. $out = '';
  181. $start = 0;
  182. foreach ($matches as $idx => $m) {
  183. $pos = $m[0][1];
  184. $charset = $m[1][0];
  185. $encoding = $m[2][0];
  186. $text = $m[3][0];
  187. $length = strlen($m[0][0]);
  188. // Append everything that is before the text to be decoded
  189. if ($start != $pos) {
  190. $substr = substr($input, $start, $pos-$start);
  191. $out .= rcube_charset::convert($substr, $default_charset);
  192. $start = $pos;
  193. }
  194. $start += $length;
  195. // Per RFC2047, each string part "MUST represent an integral number
  196. // of characters . A multi-octet character may not be split across
  197. // adjacent encoded-words." However, some mailers break this, so we
  198. // try to handle characters spanned across parts anyway by iterating
  199. // through and aggregating sequential encoded parts with the same
  200. // character set and encoding, then perform the decoding on the
  201. // aggregation as a whole.
  202. $tmp[] = $text;
  203. if ($next_match = $matches[$idx+1]) {
  204. if ($next_match[0][1] == $start
  205. && $next_match[1][0] == $charset
  206. && $next_match[2][0] == $encoding
  207. ) {
  208. continue;
  209. }
  210. }
  211. $count = count($tmp);
  212. $text = '';
  213. // Decode and join encoded-word's chunks
  214. if ($encoding == 'B' || $encoding == 'b') {
  215. // base64 must be decoded a segment at a time
  216. for ($i=0; $i<$count; $i++)
  217. $text .= base64_decode($tmp[$i]);
  218. }
  219. else { //if ($encoding == 'Q' || $encoding == 'q') {
  220. // quoted printable can be combined and processed at once
  221. for ($i=0; $i<$count; $i++)
  222. $text .= $tmp[$i];
  223. $text = str_replace('_', ' ', $text);
  224. $text = quoted_printable_decode($text);
  225. }
  226. $out .= rcube_charset::convert($text, $charset);
  227. $tmp = array();
  228. }
  229. // add the last part of the input string
  230. if ($start != strlen($input)) {
  231. $out .= rcube_charset::convert(substr($input, $start), $default_charset);
  232. }
  233. // return the results
  234. return $out;
  235. }
  236. // no encoding information, use fallback
  237. return rcube_charset::convert($input, $default_charset);
  238. }
  239. /**
  240. * Decode a mime part
  241. *
  242. * @param string $input Input string
  243. * @param string $encoding Part encoding
  244. * @return string Decoded string
  245. */
  246. public static function decode($input, $encoding = '7bit')
  247. {
  248. switch (strtolower($encoding)) {
  249. case 'quoted-printable':
  250. return quoted_printable_decode($input);
  251. case 'base64':
  252. return base64_decode($input);
  253. case 'x-uuencode':
  254. case 'x-uue':
  255. case 'uue':
  256. case 'uuencode':
  257. return convert_uudecode($input);
  258. case '7bit':
  259. default:
  260. return $input;
  261. }
  262. }
  263. /**
  264. * Split RFC822 header string into an associative array
  265. * @access private
  266. */
  267. public static function parse_headers($headers)
  268. {
  269. $a_headers = array();
  270. $headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers);
  271. $lines = explode("\n", $headers);
  272. $c = count($lines);
  273. for ($i=0; $i<$c; $i++) {
  274. if ($p = strpos($lines[$i], ': ')) {
  275. $field = strtolower(substr($lines[$i], 0, $p));
  276. $value = trim(substr($lines[$i], $p+1));
  277. if (!empty($value))
  278. $a_headers[$field] = $value;
  279. }
  280. }
  281. return $a_headers;
  282. }
  283. /**
  284. * @access private
  285. */
  286. private static function parse_address_list($str, $decode = true, $fallback = null)
  287. {
  288. // remove any newlines and carriage returns before
  289. $str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str);
  290. // extract list items, remove comments
  291. $str = self::explode_header_string(',;', $str, true);
  292. $result = array();
  293. // simplified regexp, supporting quoted local part
  294. $email_rx = '(\S+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+';
  295. foreach ($str as $key => $val) {
  296. $name = '';
  297. $address = '';
  298. $val = trim($val);
  299. if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) {
  300. $address = $m[2];
  301. $name = trim($m[1]);
  302. }
  303. else if (preg_match('/^('.$email_rx.')$/', $val, $m)) {
  304. $address = $m[1];
  305. $name = '';
  306. }
  307. else {
  308. $name = $val;
  309. }
  310. // dequote and/or decode name
  311. if ($name) {
  312. if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
  313. $name = substr($name, 1, -1);
  314. $name = stripslashes($name);
  315. }
  316. if ($decode) {
  317. $name = self::decode_header($name, $fallback);
  318. }
  319. }
  320. if (!$address && $name) {
  321. $address = $name;
  322. }
  323. if ($address) {
  324. $result[$key] = array('name' => $name, 'address' => $address);
  325. }
  326. }
  327. return $result;
  328. }
  329. /**
  330. * Explodes header (e.g. address-list) string into array of strings
  331. * using specified separator characters with proper handling
  332. * of quoted-strings and comments (RFC2822)
  333. *
  334. * @param string $separator String containing separator characters
  335. * @param string $str Header string
  336. * @param bool $remove_comments Enable to remove comments
  337. *
  338. * @return array Header items
  339. */
  340. public static function explode_header_string($separator, $str, $remove_comments = false)
  341. {
  342. $length = strlen($str);
  343. $result = array();
  344. $quoted = false;
  345. $comment = 0;
  346. $out = '';
  347. for ($i=0; $i<$length; $i++) {
  348. // we're inside a quoted string
  349. if ($quoted) {
  350. if ($str[$i] == '"') {
  351. $quoted = false;
  352. }
  353. else if ($str[$i] == "\\") {
  354. if ($comment <= 0) {
  355. $out .= "\\";
  356. }
  357. $i++;
  358. }
  359. }
  360. // we are inside a comment string
  361. else if ($comment > 0) {
  362. if ($str[$i] == ')') {
  363. $comment--;
  364. }
  365. else if ($str[$i] == '(') {
  366. $comment++;
  367. }
  368. else if ($str[$i] == "\\") {
  369. $i++;
  370. }
  371. continue;
  372. }
  373. // separator, add to result array
  374. else if (strpos($separator, $str[$i]) !== false) {
  375. if ($out) {
  376. $result[] = $out;
  377. }
  378. $out = '';
  379. continue;
  380. }
  381. // start of quoted string
  382. else if ($str[$i] == '"') {
  383. $quoted = true;
  384. }
  385. // start of comment
  386. else if ($remove_comments && $str[$i] == '(') {
  387. $comment++;
  388. }
  389. if ($comment <= 0) {
  390. $out .= $str[$i];
  391. }
  392. }
  393. if ($out && $comment <= 0) {
  394. $result[] = $out;
  395. }
  396. return $result;
  397. }
  398. /**
  399. * Interpret a format=flowed message body according to RFC 2646
  400. *
  401. * @param string $text Raw body formatted as flowed text
  402. *
  403. * @return string Interpreted text with unwrapped lines and stuffed space removed
  404. */
  405. public static function unfold_flowed($text)
  406. {
  407. $text = preg_split('/\r?\n/', $text);
  408. $last = -1;
  409. $q_level = 0;
  410. foreach ($text as $idx => $line) {
  411. if ($line[0] == '>' && preg_match('/^(>+\s*)/', $line, $regs)) {
  412. $q = strlen(str_replace(' ', '', $regs[0]));
  413. $line = substr($line, strlen($regs[0]));
  414. if ($q == $q_level && $line
  415. && isset($text[$last])
  416. && $text[$last][strlen($text[$last])-1] == ' '
  417. ) {
  418. $text[$last] .= $line;
  419. unset($text[$idx]);
  420. }
  421. else {
  422. $last = $idx;
  423. }
  424. }
  425. else {
  426. $q = 0;
  427. if ($line == '-- ') {
  428. $last = $idx;
  429. }
  430. else {
  431. // remove space-stuffing
  432. $line = preg_replace('/^\s/', '', $line);
  433. if (isset($text[$last]) && $line
  434. && $text[$last] != '-- '
  435. && $text[$last][strlen($text[$last])-1] == ' '
  436. ) {
  437. $text[$last] .= $line;
  438. unset($text[$idx]);
  439. }
  440. else {
  441. $text[$idx] = $line;
  442. $last = $idx;
  443. }
  444. }
  445. }
  446. $q_level = $q;
  447. }
  448. return implode("\r\n", $text);
  449. }
  450. /**
  451. * Wrap the given text to comply with RFC 2646
  452. *
  453. * @param string $text Text to wrap
  454. * @param int $length Length
  455. *
  456. * @return string Wrapped text
  457. */
  458. public static function format_flowed($text, $length = 72)
  459. {
  460. $text = preg_split('/\r?\n/', $text);
  461. foreach ($text as $idx => $line) {
  462. if ($line != '-- ') {
  463. if ($line[0] == '>' && preg_match('/^(>+)/', $line, $regs)) {
  464. $prefix = $regs[0];
  465. $level = strlen($prefix);
  466. $line = rtrim(substr($line, $level));
  467. $line = $prefix . self::wordwrap($line, $length - $level - 2, " \r\n$prefix ");
  468. }
  469. else if ($line) {
  470. $line = self::wordwrap(rtrim($line), $length - 2, " \r\n");
  471. // space-stuffing
  472. $line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line);
  473. }
  474. $text[$idx] = $line;
  475. }
  476. }
  477. return implode("\r\n", $text);
  478. }
  479. /**
  480. * Improved wordwrap function.
  481. *
  482. * @param string $string Text to wrap
  483. * @param int $width Line width
  484. * @param string $break Line separator
  485. * @param bool $cut Enable to cut word
  486. *
  487. * @return string Text
  488. */
  489. public static function wordwrap($string, $width=75, $break="\n", $cut=false)
  490. {
  491. $para = explode($break, $string);
  492. $string = '';
  493. while (count($para)) {
  494. $line = array_shift($para);
  495. if ($line[0] == '>') {
  496. $string .= $line.$break;
  497. continue;
  498. }
  499. $list = explode(' ', $line);
  500. $len = 0;
  501. while (count($list)) {
  502. $line = array_shift($list);
  503. $l = mb_strlen($line);
  504. $newlen = $len + $l + ($len ? 1 : 0);
  505. if ($newlen <= $width) {
  506. $string .= ($len ? ' ' : '').$line;
  507. $len += (1 + $l);
  508. }
  509. else {
  510. if ($l > $width) {
  511. if ($cut) {
  512. $start = 0;
  513. while ($l) {
  514. $str = mb_substr($line, $start, $width);
  515. $strlen = mb_strlen($str);
  516. $string .= ($len ? $break : '').$str;
  517. $start += $strlen;
  518. $l -= $strlen;
  519. $len = $strlen;
  520. }
  521. }
  522. else {
  523. $string .= ($len ? $break : '').$line;
  524. if (count($list)) {
  525. $string .= $break;
  526. }
  527. $len = 0;
  528. }
  529. }
  530. else {
  531. $string .= $break.$line;
  532. $len = $l;
  533. }
  534. }
  535. }
  536. if (count($para)) {
  537. $string .= $break;
  538. }
  539. }
  540. return $string;
  541. }
  542. /**
  543. * A method to guess the mime_type of an attachment.
  544. *
  545. * @param string $path Path to the file.
  546. * @param string $name File name (with suffix)
  547. * @param string $failover Mime type supplied for failover.
  548. * @param string $is_stream Set to True if $path contains file body
  549. *
  550. * @return string
  551. * @author Till Klampaeckel <till@php.net>
  552. * @see http://de2.php.net/manual/en/ref.fileinfo.php
  553. * @see http://de2.php.net/mime_content_type
  554. */
  555. public static function file_content_type($path, $name, $failover = 'application/octet-stream', $is_stream = false)
  556. {
  557. $mime_type = null;
  558. $mime_magic = rcube::get_instance()->config->get('mime_magic');
  559. $mime_ext = @include RCMAIL_CONFIG_DIR . '/mimetypes.php';
  560. // use file name suffix with hard-coded mime-type map
  561. if (is_array($mime_ext) && $name) {
  562. if ($suffix = substr($name, strrpos($name, '.')+1)) {
  563. $mime_type = $mime_ext[strtolower($suffix)];
  564. }
  565. }
  566. // try fileinfo extension if available
  567. if (!$mime_type && function_exists('finfo_open')) {
  568. if ($finfo = finfo_open(FILEINFO_MIME, $mime_magic)) {
  569. if ($is_stream)
  570. $mime_type = finfo_buffer($finfo, $path);
  571. else
  572. $mime_type = finfo_file($finfo, $path);
  573. finfo_close($finfo);
  574. }
  575. }
  576. // try PHP's mime_content_type
  577. if (!$mime_type && !$is_stream && function_exists('mime_content_type')) {
  578. $mime_type = @mime_content_type($path);
  579. }
  580. // fall back to user-submitted string
  581. if (!$mime_type) {
  582. $mime_type = $failover;
  583. }
  584. else {
  585. // Sometimes (PHP-5.3?) content-type contains charset definition,
  586. // Remove it (#1487122) also "charset=binary" is useless
  587. $mime_type = array_shift(preg_split('/[; ]/', $mime_type));
  588. }
  589. return $mime_type;
  590. }
  591. /**
  592. * Detect image type of the given binary data by checking magic numbers.
  593. *
  594. * @param string $data Binary file content
  595. *
  596. * @return string Detected mime-type or jpeg as fallback
  597. */
  598. public static function image_content_type($data)
  599. {
  600. $type = 'jpeg';
  601. if (preg_match('/^\x89\x50\x4E\x47/', $data)) $type = 'png';
  602. else if (preg_match('/^\x47\x49\x46\x38/', $data)) $type = 'gif';
  603. else if (preg_match('/^\x00\x00\x01\x00/', $data)) $type = 'ico';
  604. // else if (preg_match('/^\xFF\xD8\xFF\xE0/', $data)) $type = 'jpeg';
  605. return 'image/' . $type;
  606. }
  607. }