PageRenderTime 40ms CodeModel.GetById 9ms RepoModel.GetById 1ms app.codeStats 0ms

/webui/system/helper/mime.php

https://bitbucket.org/jsuto/piler
PHP | 372 lines | 243 code | 110 blank | 19 comment | 61 complexity | 5edea99664173c3879108fc50f0671e9 MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-3.0, GPL-3.0
  1. <?php
  2. class Piler_Mime_Decode {
  3. const HEADER_FIELDS = ['from', 'to', 'cc', 'subject', 'date'];
  4. public static function parseMessage($message, &$result) {
  5. self::splitMessage($message, $headers, $body);
  6. $boundary = self::getBoundary($headers);
  7. // No boundary defined
  8. if($boundary == '') {
  9. if($headers['content-type']['type'] == "message/rfc822") {
  10. self::parseMessage($body, $result);
  11. }
  12. else {
  13. $result[] = array(
  14. 'headers' => $headers,
  15. 'body' => $body
  16. );
  17. }
  18. return;
  19. }
  20. $parts = self::splitMime($body, $boundary);
  21. for($i=0; $i<count($parts); $i++) {
  22. self::splitMessage($parts[$i], $headers, $body);
  23. $boundary = self::getBoundary($headers);
  24. if($boundary) {
  25. self::parseMessage($parts[$i], $result);
  26. }
  27. else {
  28. if(in_array($headers['content-type']['type'], ["text/plain", "text/html"])) {
  29. $result[] = array('headers' => $headers, 'body' => $body);
  30. }
  31. else if($headers['content-type']['type'] == "message/rfc822") {
  32. self::parseMessage($body, $result);
  33. }
  34. }
  35. }
  36. }
  37. public static function splitMime($body, $boundary) {
  38. $start = 0;
  39. $res = array();
  40. $body = self::remove_LF($body);
  41. // Extract the mime parts excluding the boundary itself
  42. $p = strpos($body, '--' . $boundary . "\n", $start);
  43. if($p === false) {
  44. // no parts found!
  45. return array();
  46. }
  47. // Position after first boundary line
  48. $start = $p + 3 + strlen($boundary);
  49. while(($p = strpos($body, '--' . $boundary . "\n", $start)) !== false) {
  50. $res[] = substr($body, $start, $p-$start);
  51. $start = $p + 3 + strlen($boundary);
  52. }
  53. // No more parts, find end boundary
  54. $p = strpos($body, '--' . $boundary . '--', $start);
  55. if($p === false) {
  56. return array();
  57. }
  58. // The remaining part also needs to be parsed:
  59. $res[] = substr($body, $start, $p - $start);
  60. return $res;
  61. }
  62. public static function splitMessage($message, &$headers, &$body, $EOL = "\n") {
  63. self::splitMessageRaw($message, $headers, $journal, $body);
  64. $headers = self::splitHeaders($headers);
  65. }
  66. public static function splitMessageRaw($message, &$headers, &$journal, &$body, $EOL = "\n") {
  67. $headers = [];
  68. $body = '';
  69. $message = self::remove_LF($message);
  70. // Find an empty line between headers and body, otherwise we got a header-only message
  71. if(strpos($message, $EOL . $EOL)) {
  72. list($headers, $body) = explode($EOL . $EOL, $message, 2);
  73. // Check if the header is actually a journal header
  74. $headers_array = self::splitHeaders($headers);
  75. if(isset($headers_array['x-ms-journal-report']) && isset($headers_array['content-type']['boundary'])) {
  76. $boundary = $headers_array['content-type']['boundary'];
  77. $parts = self::splitMime($body, $boundary);
  78. if(count($parts) >= 2) {
  79. self::splitMessageRaw($parts[0], $s, $j, $journal);
  80. $i = strpos($parts[1], $EOL . $EOL);
  81. $msg = substr($parts[1], $i);
  82. $i = 0;
  83. while(ctype_space($msg[$i])) { $i++; }
  84. if($i > 0) { $msg = substr($msg, $i); }
  85. self::splitMessageRaw($msg, $headers, $j, $body);
  86. }
  87. }
  88. // If the message has a single binary attachment, then drop the body part
  89. if(isset($headers_array['content-type']['type'])) {
  90. foreach(['application/', 'image/'] as $type) {
  91. if(strstr($headers_array['content-type']['type'], $type)) {
  92. $body = '';
  93. break;
  94. }
  95. }
  96. }
  97. }
  98. else {
  99. $headers = $message;
  100. }
  101. }
  102. public static function removeJournal(&$message, $EOL = "\n") {
  103. $has_journal = 0;
  104. $crlfs = substr_count($message, "\r\n");
  105. self::splitMessageRaw($message, $headers, $journal, $body);
  106. if($journal) {
  107. $has_journal = 1;
  108. }
  109. // If the message has >10 CRLF sequences, then we assume
  110. // that we need to restore the removed LF characters
  111. if($crlfs > 10) {
  112. $headers = str_replace("\n", "\r\n", $headers);
  113. $body = str_replace("\n", "\r\n", $body);
  114. $EOL = "\r\n";
  115. }
  116. $message = $headers . $EOL . $EOL . $body;
  117. return $has_journal;
  118. }
  119. public static function splitHeaders($headers) {
  120. $headers = self::headersToArray($headers);
  121. // normalize header names
  122. foreach ($headers as $name => $header) {
  123. $lower = strtolower($name);
  124. if($lower == $name) {
  125. continue;
  126. }
  127. unset($headers[$name]);
  128. if(!isset($headers[$lower])) {
  129. $headers[$lower] = $header;
  130. continue;
  131. }
  132. if(is_array($headers[$lower])) {
  133. $headers[$lower][] = $header;
  134. continue;
  135. }
  136. $headers[$lower] = array($headers[$lower], $header);
  137. }
  138. // Add some default values, if they are missing
  139. if(!isset($headers['content-type'])) { $headers['content-type'] = 'text/plain'; }
  140. // I saw a dumb email (it was a spam, though) having two Date: lines.
  141. // In this case we take the first date, and discard the rest
  142. if(isset($headers[self::HEADER_FIELDS[4]]) && is_array($headers[self::HEADER_FIELDS[4]])) {
  143. $headers[self::HEADER_FIELDS[4]] = $headers[self::HEADER_FIELDS[4]][0];
  144. }
  145. for($i=0; $i<count(self::HEADER_FIELDS); $i++) {
  146. if(!isset($headers[self::HEADER_FIELDS[$i]])) { $headers[self::HEADER_FIELDS[$i]] = ''; }
  147. $headers[self::HEADER_FIELDS[$i]] = preg_replace("/gb2312/i", "GBK", $headers[self::HEADER_FIELDS[$i]]);
  148. $headers[self::HEADER_FIELDS[$i]] = iconv_mime_decode($headers[self::HEADER_FIELDS[$i]], ICONV_MIME_DECODE_CONTINUE_ON_ERROR);
  149. }
  150. $headers['content-type'] = self::splitContentType($headers['content-type']);
  151. $headers['content-type']['type'] = strtolower($headers['content-type']['type']);
  152. return $headers;
  153. }
  154. public static function headersToArray($headers = '') {
  155. $token = '';
  156. $last_token = '';
  157. $result = array();
  158. $headers = explode("\n", $headers);
  159. foreach($headers as $h) {
  160. // Handle cases when there's no whitespace between the header key and value
  161. // eg. Subject:som
  162. $h = preg_replace("/^([\S]+):(\S)/", '${1}: ${2}', $h);
  163. $h = preg_replace("/\s{1,}/", " ", $h);
  164. $line = preg_split("/\s/", $h);
  165. // Skip line if it doesn't have a colon (:) and the 1st character is not a whitespace
  166. if(!ctype_space($h[0]) && !strchr($h, ':')) { continue; }
  167. if($line) {
  168. if(substr($line[0], -1) == ':') {
  169. $token = array_shift($line);
  170. $token = rtrim($token, ':');
  171. $last_token = $token;
  172. }
  173. else {
  174. $token = '';
  175. }
  176. $line_str = implode(" ", $line);
  177. if(!isset($result[$last_token])) {
  178. $result[$last_token] = $line_str;
  179. }
  180. else {
  181. if($token) {
  182. $result[$last_token] .= "\n";
  183. }
  184. $result[$last_token] .= ' ' . $line_str;
  185. }
  186. }
  187. }
  188. foreach($result as $k => $v) {
  189. if(strchr($v, "\n")) {
  190. $result[$k] = explode("\n", $v);
  191. }
  192. }
  193. return $result;
  194. }
  195. public static function splitContentType($field = '') {
  196. $split = array();
  197. $what = 'type';
  198. $field = $what . '=' . $field;
  199. if(!preg_match_all('%([^=\s]+)\s*=\s*("[^"]+"|[^;]+)(;\s*|$)%', $field, $matches)) {
  200. return $split;
  201. }
  202. $split = array();
  203. foreach ($matches[1] as $key => $name) {
  204. $name = strtolower($name);
  205. if($matches[2][$key][0] == '"') {
  206. $split[$name] = substr($matches[2][$key], 1, -1);
  207. } else {
  208. $split[$name] = $matches[2][$key];
  209. }
  210. }
  211. return $split;
  212. }
  213. public static function remove_LF($message = '') {
  214. return str_replace("\r", "", $message);
  215. //return preg_replace("/\r/", "", $message);
  216. }
  217. public static function getBoundary($headers = array()) {
  218. if(isset($headers['content-type']['boundary'])) {
  219. return $headers['content-type']['boundary'];
  220. }
  221. return '';
  222. }
  223. public static function fixMimeBodyPart($headers = array(), $body = '') {
  224. if(isset($headers['content-transfer-encoding'])) {
  225. if(strtolower($headers['content-transfer-encoding']) == 'quoted-printable') {
  226. $body = quoted_printable_decode($body);
  227. }
  228. if(strtolower($headers['content-transfer-encoding']) == 'base64') {
  229. $body = base64_decode($body);
  230. }
  231. }
  232. if(isset($headers['content-type']['charset'])) {
  233. if(strtolower($headers['content-type']['charset']) == 'gb2312') {
  234. $headers['content-type']['charset'] = 'GBK';
  235. }
  236. $body = iconv($headers['content-type']['charset'], 'utf-8' . '//IGNORE', $body);
  237. }
  238. if(strtolower($headers['content-type']['type']) == 'text/plain') {
  239. $body = self::escape_lt_gt_symbols($body);
  240. $body = preg_replace("/\n/", "<br />\n", $body);
  241. $body = "\n" . self::printNicely($body);
  242. }
  243. return $body;
  244. }
  245. public static function escape_lt_gt_symbols($s = '') {
  246. $s = preg_replace("/</", "&lt;", $s);
  247. $s = preg_replace("/>/", "&gt;", $s);
  248. return $s;
  249. }
  250. public static function printNicely($s = '') {
  251. $k = 0;
  252. $nice = "";
  253. $x = explode(" ", $s);
  254. for($i=0; $i<count($x); $i++){
  255. $nice .= $x[$i] . " ";
  256. $k += strlen($x[$i]);
  257. if(strstr($x[$i], "\n")){ $k = 0; }
  258. if($k > 70){ $nice .= "\n"; $k = 0; }
  259. }
  260. return $nice;
  261. }
  262. }