PageRenderTime 58ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/Jyxo/Mail/Parser.php

http://github.com/jyxo/php
PHP | 1354 lines | 871 code | 189 blank | 294 comment | 198 complexity | 509e3548851946619dec216fff3d5451 MD5 | raw file
  1. <?php declare(strict_types = 1);
  2. /**
  3. * Jyxo PHP Library
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file license.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * https://github.com/jyxo/php/blob/master/license.txt
  11. */
  12. namespace Jyxo\Mail;
  13. use Jyxo\Charset;
  14. use Jyxo\Mail\Parser\EmailNotExistException;
  15. use stdClass;
  16. use function array_diff;
  17. use function array_keys;
  18. use function array_search;
  19. use function count;
  20. use function end;
  21. use function explode;
  22. use function imap_base64;
  23. use function imap_fetchbody;
  24. use function imap_fetchheader;
  25. use function imap_fetchstructure;
  26. use function imap_headerinfo;
  27. use function imap_mime_header_decode;
  28. use function imap_msgno;
  29. use function imap_rfc822_parse_adrlist;
  30. use function imap_rfc822_parse_headers;
  31. use function in_array;
  32. use function is_array;
  33. use function is_object;
  34. use function max;
  35. use function preg_match;
  36. use function preg_match_all;
  37. use function quoted_printable_decode;
  38. use function rawurldecode;
  39. use function str_replace;
  40. use function stripos;
  41. use function strlen;
  42. use function strpos;
  43. use function strrpos;
  44. use function strtolower;
  45. use function strtotime;
  46. use function strtr;
  47. use function strval;
  48. use function substr;
  49. use function time;
  50. use function trim;
  51. use const ENC7BIT;
  52. use const ENC8BIT;
  53. use const ENCBASE64;
  54. use const ENCBINARY;
  55. use const ENCOTHER;
  56. use const ENCQUOTEDPRINTABLE;
  57. use const FT_UID;
  58. use const TYPEAPPLICATION;
  59. use const TYPEAUDIO;
  60. use const TYPEIMAGE;
  61. use const TYPEMESSAGE;
  62. use const TYPEMODEL;
  63. use const TYPEMULTIPART;
  64. use const TYPEOTHER;
  65. use const TYPETEXT;
  66. use const TYPEVIDEO;
  67. /**
  68. * Mail parsing class.
  69. * Based on \Mail\IMAPv2 class (c) Copyright 2004-2005 Richard York
  70. *
  71. * @copyright Copyright (c) 2005-2011 Jyxo, s.r.o.
  72. * @license https://github.com/jyxo/php/blob/master/license.txt
  73. * @author Jaroslav HanslĂ­k
  74. */
  75. class Parser
  76. {
  77. /**
  78. * Retrieve message body.
  79. * Search for possible alternatives.
  80. *
  81. * @see \Jyxo\Mail\Parser::getBody()
  82. */
  83. public const BODY = 0;
  84. /**
  85. * Retrieve body info.
  86. *
  87. * @see \Jyxo\Mail\Parser::getBody()
  88. */
  89. public const BODY_INFO = 1;
  90. /**
  91. * Retrieve raw message body.
  92. *
  93. * @see \Jyxo\Mail\Parser::getBody()
  94. */
  95. public const BODY_LITERAL = 2;
  96. /**
  97. * Retrieve decoded message body.
  98. *
  99. * @see \Jyxo\Mail\Parser::getBody()
  100. */
  101. public const BODY_LITERAL_DECODE = 3;
  102. /**
  103. * IMAP folder connection.
  104. *
  105. * @var resource
  106. */
  107. private $connection = null;
  108. /**
  109. * Message Id.
  110. *
  111. * @var int
  112. */
  113. private $uid = null;
  114. /**
  115. * Message structure.
  116. *
  117. * @var array
  118. */
  119. private $structure = [];
  120. /**
  121. * Default part Id.
  122. *
  123. * @var string
  124. */
  125. private $defaultPid = null;
  126. /**
  127. * Message parts (attachments and inline parts).
  128. *
  129. * @var array
  130. */
  131. private $parts = [];
  132. /**
  133. * List of part types.
  134. *
  135. * @var array
  136. */
  137. private static $dataTypes = [
  138. TYPETEXT => 'text',
  139. TYPEMULTIPART => 'multipart',
  140. TYPEMESSAGE => 'message',
  141. TYPEAPPLICATION => 'application',
  142. TYPEAUDIO => 'audio',
  143. TYPEIMAGE => 'image',
  144. TYPEVIDEO => 'video',
  145. TYPEMODEL => 'model',
  146. TYPEOTHER => 'other',
  147. ];
  148. /**
  149. * List of encodings.
  150. *
  151. * @var array
  152. */
  153. private static $encodingTypes = [
  154. ENC7BIT => '7bit',
  155. ENC8BIT => '8bit',
  156. ENCBINARY => 'binary',
  157. ENCBASE64 => 'base64',
  158. ENCQUOTEDPRINTABLE => 'quoted-printable',
  159. ENCOTHER => 'other',
  160. 6 => 'other',
  161. ];
  162. /**
  163. * Creates an instance.
  164. *
  165. * @param resource $connection IMAP folder connection.
  166. * @param int $uid Message Id
  167. */
  168. public function __construct($connection, int $uid)
  169. {
  170. $this->connection = $connection;
  171. $this->uid = $uid;
  172. }
  173. /**
  174. * Returns headers.
  175. *
  176. * @param string $pid Part Id
  177. * @return array
  178. */
  179. public function getHeaders(?string $pid = null): array
  180. {
  181. // Parses headers
  182. $rawHeaders = $this->getRawHeaders($pid);
  183. if ($pid === null) {
  184. $msgno = imap_msgno($this->connection, $this->uid);
  185. if ($msgno === 0) {
  186. throw new Parser\EmailNotExistException('Email does not exist');
  187. }
  188. $headerInfo = imap_headerinfo($this->connection, $msgno);
  189. } else {
  190. $headerInfo = imap_rfc822_parse_headers($rawHeaders);
  191. }
  192. if ($headerInfo === false) {
  193. return [];
  194. }
  195. // Adds a header that the IMAP extension does not support
  196. if (preg_match("~Disposition-Notification-To:(.+?)(?=\r?\n(?:\\S|\r?\n))~is", $rawHeaders, $matches)) {
  197. $addressList = imap_rfc822_parse_adrlist($matches[1], '');
  198. // {''} is used because of CS rules
  199. $headerInfo->{'disposition_notification_toaddress'} = substr(trim($matches[1]), 0, 1024);
  200. $headerInfo->{'disposition_notification_to'} = [$addressList[0]];
  201. }
  202. $headers = [];
  203. static $mimeHeaders = [
  204. 'toaddress',
  205. 'ccaddress',
  206. 'bccaddress',
  207. 'fromaddress',
  208. 'reply_toaddress',
  209. 'senderaddress',
  210. 'return_pathaddress',
  211. 'subject',
  212. 'fetchfrom',
  213. 'fetchsubject',
  214. 'disposition_notification_toaddress',
  215. ];
  216. foreach ($headerInfo as $key => $value) {
  217. if ((!is_object($value)) && (!is_array($value))) {
  218. $headers[$key] = in_array($key, $mimeHeaders, true)
  219. ? $this->decodeMimeHeader($value)
  220. : $this->convertToUtf8((string) $value);
  221. }
  222. }
  223. // Adds "udate" if missing
  224. if (!empty($headerInfo->udate)) {
  225. $headers['udate'] = $headerInfo->udate;
  226. } elseif (!empty($headerInfo->date)) {
  227. $headers['udate'] = strtotime($headerInfo->date);
  228. } else {
  229. $headers['udate'] = time();
  230. }
  231. // Parses references
  232. $headers['references'] = isset($headers['references']) ? explode('> <', trim($headers['references'], '<>')) : [];
  233. static $types = ['to', 'cc', 'bcc', 'from', 'reply_to', 'sender', 'return_path', 'disposition_notification_to'];
  234. for ($i = 0; $i < count($types); $i++) {
  235. $type = $types[$i];
  236. $headers[$type] = [];
  237. if (!isset($headerInfo->$type)) {
  238. continue;
  239. }
  240. foreach ($headerInfo->$type as $object) {
  241. $newHeader = [];
  242. foreach ($object as $attributeName => $attributeValue) {
  243. if (!empty($attributeValue)) {
  244. $newHeader[$attributeName] = $attributeName === 'personal'
  245. ? $this->decodeMimeHeader($attributeValue)
  246. : $this->convertToUtf8($attributeValue);
  247. }
  248. }
  249. if (empty($newHeader)) {
  250. continue;
  251. }
  252. if (isset($newHeader['mailbox'], $newHeader['host'])) {
  253. $newHeader['email'] = $newHeader['mailbox'] . '@' . $newHeader['host'];
  254. } elseif (isset($newHeader['mailbox'])) {
  255. $newHeader['email'] = $newHeader['mailbox'];
  256. } else {
  257. $newHeader['email'] = 'undisclosed-recipients';
  258. }
  259. $headers[$type][] = $newHeader;
  260. }
  261. }
  262. // Adds X-headers
  263. if (preg_match_all("~(X(?:[\-]\\w+)+):(.+?)(?=\r?\n(?:\\S|\r?\n))~is", $rawHeaders, $matches) > 0) {
  264. for ($i = 0; $i < count($matches[0]); $i++) {
  265. // Converts to the format used by imap_headerinfo()
  266. $key = str_replace('-', '_', strtolower($matches[1][$i]));
  267. // Removes line endings
  268. $value = strtr(trim($matches[2][$i]), ["\r" => '', "\n" => '', "\t" => ' ']);
  269. $headers[$key] = $value;
  270. }
  271. }
  272. return $headers;
  273. }
  274. /**
  275. * Parses message body.
  276. *
  277. * @param string $pid Part Id
  278. * @param string $mimeType Default mime-type
  279. * @param bool $alternative Should the alternative part be used as well
  280. * @param bool $all Should all parts get parsed
  281. */
  282. public function parseBody(?string $pid = null, string $mimeType = 'text/html', bool $alternative = true, bool $all = false): void
  283. {
  284. try {
  285. $this->checkIfParsed();
  286. } catch (EmailNotExistException $e) {
  287. throw $e;
  288. }
  289. if ($pid === null) {
  290. $pid = $this->defaultPid;
  291. }
  292. // If only one part exists, it is already parsed
  293. if (count($this->structure['pid']) <= 1) {
  294. return;
  295. }
  296. $key = array_search($pid, $this->structure['pid'], true);
  297. if ($key === false) {
  298. return;
  299. }
  300. if ($all) {
  301. $this->parseMultiparts($pid, $mimeType, 'all', 2, $alternative);
  302. } else {
  303. if ($pid === $this->defaultPid) {
  304. $this->parseMultiparts($pid, $mimeType, 'top', 2, $alternative);
  305. } elseif ($this->structure['ftype'][1] === 'message/rfc822') {
  306. $this->parseMultiparts($pid, $mimeType, 'subparts', 1, $alternative);
  307. }
  308. }
  309. }
  310. /**
  311. * Returns a list of attachments.
  312. *
  313. * @return array
  314. */
  315. public function getAttachments(): array
  316. {
  317. return $this->parts['attach']['pid'] ?? [];
  318. }
  319. /**
  320. * Returns a list of part Ids of inline parts.
  321. *
  322. * @return array
  323. */
  324. public function getInlines(): array
  325. {
  326. return $this->parts['inline']['pid'] ?? [];
  327. }
  328. /**
  329. * Returns related parts.
  330. *
  331. * @param string $pid Part Id
  332. * @param array $types List of types to search for
  333. * @param bool $all Return all types
  334. * @return array
  335. */
  336. public function getRelatedParts(string $pid, array $types, bool $all = false): array
  337. {
  338. try {
  339. $this->checkIfParsed();
  340. } catch (EmailNotExistException $e) {
  341. throw $e;
  342. }
  343. $related = [];
  344. if (!empty($this->structure['pid'])) {
  345. // Deals a problem with multipart/alternative and multipart/report, when they are as the first part and don't have any real Ids (they have a fake Id 0 assigned then)
  346. if ($pid === '0') {
  347. for ($i = 1; $i < count($this->structure['pid']); $i++) {
  348. // Subparts do not contain a dot because they are in the first level
  349. if (
  350. (strpos($this->structure['pid'][$i], '.') === false)
  351. && (
  352. $all
  353. || (in_array($this->structure['ftype'][$i], $types, true))
  354. )
  355. ) {
  356. $related['pid'][] = $this->structure['pid'][$i];
  357. $related['ftype'][] = $this->structure['ftype'][$i];
  358. }
  359. }
  360. } else {
  361. $level = count(explode('.', $pid));
  362. foreach ($this->structure['pid'] as $i => $rpid) {
  363. // Part is one level deeper and the first number equals to the parent
  364. if ((count(explode('.', $rpid)) === $level + 1) && ($pid === substr($rpid, 0, strrpos($rpid, '.')))) {
  365. if ($all || (in_array($this->structure['ftype'][$i], $types, true))) {
  366. $related['pid'][] = $this->structure['pid'][$i];
  367. $related['ftype'][] = $this->structure['ftype'][$i];
  368. }
  369. }
  370. }
  371. }
  372. }
  373. return $related;
  374. }
  375. /**
  376. * Returns all related parts.
  377. *
  378. * @param string $pid Part Id
  379. * @return array
  380. */
  381. public function getAllRelatedParts(string $pid): array
  382. {
  383. try {
  384. return $this->getRelatedParts($pid, [], true);
  385. } catch (EmailNotExistException $e) {
  386. throw $e;
  387. }
  388. }
  389. /**
  390. * Returns body of the given part.
  391. *
  392. * @param string $pid Part Id
  393. * @param int $mode Body return mode
  394. * @param string $mimeType Requested mime-type
  395. * @param int $attempt Number of retries
  396. * @return array
  397. */
  398. public function getBody(string $pid = '1', int $mode = self::BODY, string $mimeType = 'text/html', int $attempt = 1): array
  399. {
  400. try {
  401. $this->checkIfParsed();
  402. } catch (EmailNotExistException $e) {
  403. throw $e;
  404. }
  405. $key = array_search($pid, $this->structure['pid'], true);
  406. if ($key === false) {
  407. throw new Parser\PartNotExistException('Requested part does not exist');
  408. }
  409. $output = [
  410. 'encoding' => $this->structure['encoding'][$key],
  411. 'type' => $this->structure['ftype'][$key],
  412. 'size' => $this->structure['fsize'][$key],
  413. ];
  414. if (isset($this->structure['fname'][$key])) {
  415. $output['filename'] = $this->structure['fname'][$key];
  416. }
  417. if (isset($this->structure['charset'][$key])) {
  418. $output['charset'] = $this->structure['charset'][$key];
  419. }
  420. if (isset($this->structure['cid'][$key])) {
  421. $output['cid'] = $this->structure['cid'][$key];
  422. }
  423. if ($mode === self::BODY_INFO) {
  424. return $output;
  425. }
  426. if ($mode === self::BODY_LITERAL) {
  427. $output['content'] = imap_fetchbody($this->connection, $this->uid, $pid, FT_UID);
  428. return $output;
  429. }
  430. if ($mode === self::BODY_LITERAL_DECODE) {
  431. $output['content'] = self::decodeBody(imap_fetchbody($this->connection, $this->uid, $pid, FT_UID), $output['encoding']);
  432. // Textual types are converted to UTF-8
  433. if (strpos($output['type'], 'text/') === 0 || (strpos($output['type'], 'message/') === 0)) {
  434. $output['content'] = $this->convertToUtf8($output['content'], $output['charset'] ?? '');
  435. }
  436. return $output;
  437. }
  438. // Get a new part number
  439. if (
  440. ($this->structure['ftype'][$key] === 'message/rfc822')
  441. || ($this->isPartMultipart($key, 'related'))
  442. || ($this->isPartMultipart($key, 'alternative'))
  443. || ($this->isPartMultipart($key, 'report'))
  444. ) {
  445. $newPid = ($this->structure['ftype'][$key] === 'message/rfc822')
  446. || ($this->isPartMultipart($key, 'related'))
  447. || ($this->isPartMultipart($key, 'alternative'))
  448. || ($this->isPartMultipart($key, 'report'))
  449. ? $this->getMultipartPid($pid, $mimeType, 'subparts')
  450. : $this->getMultipartPid($pid, $mimeType, 'multipart');
  451. // If no type was found, try again
  452. if (!empty($newPid)) {
  453. $pid = $newPid;
  454. } elseif (empty($newPid) && ($mimeType === 'text/html')) {
  455. if ($attempt === 1) {
  456. return $this->getBody($pid, $mode, 'text/plain', 2);
  457. }
  458. } elseif (empty($newPid) && ($mimeType === 'text/plain')) {
  459. if ($attempt === 1) {
  460. return $this->getBody($pid, $mode, 'text/html', 2);
  461. }
  462. }
  463. }
  464. if (!empty($newPid)) {
  465. $key = array_search($pid, $this->structure['pid'], true);
  466. if ($key === false) {
  467. throw new Parser\PartNotExistException('Requested part does not exist');
  468. }
  469. }
  470. $output['encoding'] = $this->structure['encoding'][$key];
  471. $output['type'] = $this->structure['ftype'][$key];
  472. $output['size'] = $this->structure['fsize'][$key];
  473. if (isset($this->structure['fname'][$key])) {
  474. $output['filename'] = $this->structure['fname'][$key];
  475. }
  476. if (isset($this->structure['charset'][$key])) {
  477. $output['charset'] = $this->structure['charset'][$key];
  478. }
  479. $output['content'] = self::decodeBody(imap_fetchbody($this->connection, $this->uid, $pid, FT_UID), $output['encoding']);
  480. // Textual types are converted to UTF-8
  481. if (strpos($output['type'], 'text/') === 0 || (strpos($output['type'], 'message/') === 0)) {
  482. $output['content'] = $this->convertToUtf8($output['content'], $output['charset'] ?? '');
  483. }
  484. return $output;
  485. }
  486. /**
  487. * Returns a list of part Ids of given types.
  488. *
  489. * @param array $types Part types
  490. * @return array
  491. */
  492. public function getMime(array $types): array
  493. {
  494. try {
  495. $this->checkIfParsed();
  496. } catch (EmailNotExistException $e) {
  497. throw $e;
  498. }
  499. $parts = [];
  500. if (is_array($this->structure['ftype'])) {
  501. foreach ($types as $type) {
  502. foreach (array_keys($this->structure['ftype'], $type, true) as $key) {
  503. $parts[] = $this->structure['pid'][$key];
  504. }
  505. }
  506. }
  507. return $parts;
  508. }
  509. /**
  510. * Returns a list of part Ids of all parts except for the given types.
  511. *
  512. * @param array $exceptTypes Ignored part types
  513. * @return array
  514. */
  515. public function getMimeExcept(array $exceptTypes): array
  516. {
  517. try {
  518. $this->checkIfParsed();
  519. } catch (EmailNotExistException $e) {
  520. throw $e;
  521. }
  522. $parts = [];
  523. if (is_array($this->structure['ftype'])) {
  524. $allExcept = array_diff($this->structure['ftype'], $exceptTypes);
  525. foreach (array_keys($allExcept) as $key) {
  526. $parts[] = $this->structure['pid'][$key];
  527. }
  528. }
  529. return $parts;
  530. }
  531. /**
  532. * Decodes body.
  533. *
  534. * @param string $body Body
  535. * @param string $encoding Body encoding
  536. * @return string
  537. */
  538. public static function decodeBody(string $body, string $encoding): string
  539. {
  540. switch ($encoding) {
  541. case 'quoted-printable':
  542. return quoted_printable_decode($body);
  543. case 'base64':
  544. $decoded = imap_base64($body);
  545. if ($decoded === false) {
  546. throw new Parser\BodyNotDecodedException('Body cannot be decoded.');
  547. }
  548. return $decoded;
  549. default:
  550. return $body;
  551. }
  552. }
  553. /**
  554. * Parses a message if not already parsed.
  555. */
  556. private function checkIfParsed(): void
  557. {
  558. try {
  559. if (empty($this->structure)) {
  560. $this->setStructure();
  561. }
  562. if (empty($this->defaultPid)) {
  563. $this->defaultPid = $this->getDefaultPid();
  564. }
  565. } catch (EmailNotExistException $e) {
  566. throw $e;
  567. }
  568. }
  569. /**
  570. * Creates message structure.
  571. *
  572. * @param array $subparts Subparts
  573. * @param string $parentPartId Parent Id
  574. * @param bool $skipPart Skip parts
  575. * @param bool $lastWasSigned Was the pared signed
  576. */
  577. private function setStructure(
  578. ?array $subparts = null,
  579. ?string $parentPartId = null,
  580. bool $skipPart = false,
  581. bool $lastWasSigned = false
  582. ): void
  583. {
  584. // First call - an object returned by the imap_fetchstructure function is returned
  585. if ($subparts === null) {
  586. $this->structure['obj'] = imap_fetchstructure($this->connection, $this->uid, FT_UID);
  587. if (!$this->structure['obj']) {
  588. throw new Parser\EmailNotExistException('Email does not exist');
  589. }
  590. }
  591. // Sometimes (especially in spams) the type is missing
  592. if (empty($this->structure['obj']->type)) {
  593. $this->structure['obj']->type = TYPETEXT;
  594. }
  595. // For situations when the body is missing but we have attachments
  596. if (($this->structure['obj']->type !== TYPETEXT)
  597. && ($this->structure['obj']->type !== TYPEMULTIPART)) {
  598. $temp = $this->structure['obj'];
  599. // Don't add a body just create the multipart container because the body wouldn't have an Id
  600. $this->structure['obj'] = new stdClass();
  601. $this->structure['obj']->type = TYPEMULTIPART;
  602. $this->structure['obj']->ifsubtype = 1;
  603. $this->structure['obj']->subtype = 'MIXED';
  604. $this->structure['obj']->ifdescription = 0;
  605. $this->structure['obj']->ifid = '0';
  606. $this->structure['obj']->bytes = $temp->bytes ?? 0;
  607. $this->structure['obj']->ifdisposition = 1;
  608. $this->structure['obj']->disposition = 'inline';
  609. $this->structure['obj']->ifdparameters = 0;
  610. $this->structure['obj']->dparameters = [];
  611. $this->structure['obj']->ifparameters = 0;
  612. $this->structure['obj']->parameters = [];
  613. $this->structure['obj']->parts = [$temp];
  614. }
  615. // Deals a multipart/alternative or multipart/report problem when they are as the first part
  616. if (($subparts === null) && ($parentPartId === null)) {
  617. $ftype = empty($this->structure['obj']->type)
  618. ? $this->getMajorMimeType(0) . '/' . strtolower($this->structure['obj']->subtype)
  619. : $this->getMajorMimeType($this->structure['obj']->type) . '/' . strtolower($this->structure['obj']->subtype);
  620. // As first they do not have any actual Id, assign a fake one 0
  621. $this->structure['pid'][0] = '0';
  622. $this->structure['ftype'][0] = $ftype;
  623. $this->structure['encoding'][0] = !empty($this->structure['obj']->encoding)
  624. ? self::$encodingTypes[$this->structure['obj']->encoding]
  625. : self::$encodingTypes[0];
  626. $this->structure['fsize'][0] = !empty($this->structure['obj']->bytes) ? $this->structure['obj']->bytes : 0;
  627. $this->structure['disposition'][0] = 'inline';
  628. }
  629. // Subparts
  630. if (isset($this->structure['obj']->parts) || is_array($subparts)) {
  631. $parts = is_array($subparts) ? $subparts : $this->structure['obj']->parts;
  632. $count = 1;
  633. foreach ($parts as $part) {
  634. // Skips multipart/mixed, following multipart/alternative or multipart/report (if this part is message/rfc822), multipart/related
  635. // There are more problematic parts but we haven't tested them yet
  636. $ftype = empty($part->type)
  637. ? $this->getMajorMimeType(0) . '/' . strtolower($part->subtype)
  638. : $this->getMajorMimeType($part->type) . '/' . strtolower($part->subtype);
  639. $thisIsSigned = ($ftype === 'multipart/signed');
  640. $skipNext = ($ftype === 'message/rfc822');
  641. $no = isset($this->structure['pid']) ? count($this->structure['pid']) : 0;
  642. // Skip parts fulfilling certain conditions
  643. if (
  644. ($ftype === 'multipart/mixed')
  645. && (
  646. $lastWasSigned
  647. || $skipPart
  648. )
  649. || ($ftype === 'multipart/signed')
  650. || (
  651. $skipPart
  652. && ($ftype === 'multipart/alternative')
  653. )
  654. || (
  655. $skipPart
  656. && ($ftype === 'multipart/report')
  657. )
  658. || (
  659. ($ftype === 'multipart/related')
  660. && (count($parts) === 1)
  661. )
  662. ) {
  663. $skipped = true;
  664. // Although this part is skipped, save is for later use (as Id we use the parent Id)
  665. $this->structure['pid'][$no] = $parentPartId;
  666. $this->structure['ftype'][$no] = $ftype;
  667. $this->structure['encoding'][$no] = !empty($this->structure['obj']->encoding)
  668. ? self::$encodingTypes[$this->structure['obj']->encoding]
  669. : self::$encodingTypes[0];
  670. $this->structure['fsize'][$no] = !empty($this->structure['obj']->bytes) ? $this->structure['obj']->bytes : 0;
  671. $this->structure['disposition'][$no] = 'inline';
  672. } else {
  673. $skipped = false;
  674. $this->structure['pid'][$no] = !is_array($subparts) ? strval($count) : $parentPartId . '.' . $count;
  675. $this->structure['ftype'][$no] = $ftype;
  676. $this->structure['encoding'][$no] = !empty($part->encoding)
  677. ? self::$encodingTypes[$part->encoding]
  678. : self::$encodingTypes[0];
  679. $this->structure['fsize'][$no] = !empty($part->bytes) ? $part->bytes : 0;
  680. // Loads parameters
  681. if ($part->ifdparameters) {
  682. foreach ($part->dparameters as $param) {
  683. $this->structure[strtolower($param->attribute)][$no] = strtolower($param->value);
  684. }
  685. }
  686. if ($part->ifparameters) {
  687. foreach ($part->parameters as $param) {
  688. $this->structure[strtolower($param->attribute)][$no] = strtolower($param->value);
  689. }
  690. }
  691. // Builds a part name (can be split into multiple lines)
  692. if ($part->ifparameters) {
  693. foreach ($part->parameters as $param) {
  694. if (stripos($param->attribute, 'name') === 0) {
  695. if (!isset($this->structure['fname'][$no])) {
  696. $this->structure['fname'][$no] = $param->value;
  697. } else {
  698. $this->structure['fname'][$no] .= $param->value;
  699. }
  700. }
  701. }
  702. }
  703. if (
  704. $part->ifdparameters
  705. && (
  706. !isset($this->structure['fname'][$no])
  707. || (empty($this->structure['fname'][$no]))
  708. )
  709. ) {
  710. foreach ($part->dparameters as $param) {
  711. if (stripos($param->attribute, 'filename') === 0) {
  712. if (!isset($this->structure['fname'][$no])) {
  713. $this->structure['fname'][$no] = $param->value;
  714. } else {
  715. $this->structure['fname'][$no] .= $param->value;
  716. }
  717. }
  718. }
  719. }
  720. // If a name exists, decode it
  721. if (isset($this->structure['fname'][$no])) {
  722. $this->structure['fname'][$no] = $this->decodeFilename($this->structure['fname'][$no]);
  723. }
  724. // If the given part is message/rfc822, load its headers and use the subject as its name
  725. if ($ftype === 'message/rfc822') {
  726. $rfcHeader = $this->getHeaders($this->structure['pid'][$no]);
  727. $this->structure['fname'][$no] = !empty($rfcHeader['subject']) ? $rfcHeader['subject'] . '.eml' : '';
  728. }
  729. // Part Id
  730. if ($part->ifid) {
  731. $this->structure['cid'][$no] = substr($part->id, 1, -1);
  732. }
  733. // Attachment or inline part (sometimes we do not get the required information from the message or it's nonsense)
  734. [$type, $subtype] = explode('/', $ftype);
  735. if ($part->ifdisposition && (strtolower($part->disposition) === 'attachment')) {
  736. $this->structure['disposition'][$no] = 'attachment';
  737. } elseif (isset($this->structure['cid'][$no]) && ($type === 'image')) {
  738. $this->structure['disposition'][$no] = 'inline';
  739. } elseif (
  740. ($type === 'message')
  741. || ($type === 'application')
  742. || ($type === 'image')
  743. || ($type === 'audio')
  744. || ($type === 'video')
  745. || ($type === 'model')
  746. || ($type === 'other')
  747. ) {
  748. $this->structure['disposition'][$no] = 'attachment';
  749. } elseif (($type === 'text') && (($subtype !== 'html') && ($subtype !== 'plain'))) {
  750. $this->structure['disposition'][$no] = 'attachment';
  751. } elseif (($type === 'text') && (isset($this->structure['fname'][$no]))) {
  752. $this->structure['disposition'][$no] = 'attachment';
  753. } else {
  754. $this->structure['disposition'][$no] = 'inline';
  755. }
  756. }
  757. if (isset($part->parts) && (is_array($part->parts))) {
  758. if (!$skipped) {
  759. $this->structure['hasAttach'][$no] = true;
  760. }
  761. $this->setStructure($part->parts, end($this->structure['pid']), $skipNext, $thisIsSigned);
  762. } elseif (!$skipped) {
  763. $this->structure['hasAttach'][$no] = false;
  764. }
  765. $count++;
  766. }
  767. } else {
  768. // No subparts
  769. $this->structure['pid'][0] = '1';
  770. $this->structure['ftype'][0] = $this->getMajorMimeType($this->structure['obj']->type) . '/' . strtolower(
  771. $this->structure['obj']->subtype
  772. );
  773. // If the message has only one part it should be text/plain or text/html
  774. if (($this->structure['ftype'][0] !== 'text/plain') && ($this->structure['ftype'][0] !== 'text/html')) {
  775. $this->structure['ftype'][0] = 'text/plain';
  776. }
  777. if (empty($this->structure['obj']->encoding)) {
  778. $this->structure['obj']->encoding = 0;
  779. }
  780. $this->structure['encoding'][0] = self::$encodingTypes[$this->structure['obj']->encoding];
  781. if (isset($this->structure['obj']->bytes)) {
  782. $this->structure['fsize'][0] = $this->structure['obj']->bytes;
  783. }
  784. $this->structure['disposition'][0] = 'inline';
  785. $this->structure['hasAttach'][0] = false;
  786. // Walks through next parameters
  787. if (isset($this->structure['obj']->ifparameters) && ($this->structure['obj']->ifparameters)) {
  788. foreach ($this->structure['obj']->parameters as $param) {
  789. $this->structure[strtolower($param->attribute)][0] = $param->value;
  790. }
  791. }
  792. }
  793. }
  794. /**
  795. * Returns default part's Id.
  796. *
  797. * @param string $mimeType Mime-type
  798. * @param int $attempt Number of retries
  799. * @return string
  800. */
  801. private function getDefaultPid(string $mimeType = 'text/html', int $attempt = 1): string
  802. {
  803. $mimeCheck = $mimeType === 'text/html' ? ['text/html', 'text/plain'] : ['text/plain', 'text/html'];
  804. // Tries to find text/html or text/plain in main parts
  805. foreach ($mimeCheck as $mime) {
  806. $parts = array_keys($this->structure['ftype'], $mime, true);
  807. foreach ($parts as $part) {
  808. if (($this->structure['disposition'][$part] === 'inline')
  809. && (strpos($this->structure['pid'][$part], '.') === false)) {
  810. return $this->structure['pid'][$part];
  811. }
  812. }
  813. }
  814. // There was nothing found in the main parts, try multipart/alternative or multipart/report
  815. $partLevel = 1;
  816. $pidLength = 1;
  817. foreach ($this->structure['pid'] as $partNo => $pid) {
  818. if ($pid === null) {
  819. continue;
  820. }
  821. $level = count(explode('.', $pid));
  822. if (!isset($multipartPid)) {
  823. if (($level === 1) && (isset($this->structure['ftype'][$partNo])) && ($this->isPartMultipart($partNo, 'related'))) {
  824. $partLevel = 2;
  825. $pidLength = 3;
  826. continue;
  827. }
  828. if (
  829. ($level === $partLevel)
  830. && (isset($this->structure['ftype'][$partNo]))
  831. && (
  832. $this->isPartMultipart($partNo, 'alternative')
  833. || ($this->isPartMultipart($partNo, 'report'))
  834. || ($this->isPartMultipart($partNo, 'mixed'))
  835. )
  836. ) {
  837. $multipartPid = $pid;
  838. continue;
  839. }
  840. }
  841. if (
  842. isset($multipartPid)
  843. && ($level === $partLevel + 1)
  844. && ($this->structure['ftype'][$partNo] === $mimeType)
  845. && ($multipartPid === substr($pid, 0, $pidLength))
  846. ) {
  847. return $pid;
  848. }
  849. }
  850. // Nothing was found, try next possible type
  851. if ($attempt === 1) {
  852. return $mimeType === 'text/html' ? $this->getDefaultPid('text/plain', 2) : $this->getDefaultPid('text/html', 2);
  853. }
  854. // There should be a default part found in every mail; this is because of spams that are often in wrong format
  855. return '1';
  856. }
  857. /**
  858. * Returns raw headers.
  859. *
  860. * @param string $pid Part Id
  861. * @return string
  862. */
  863. private function getRawHeaders(?string $pid = null): string
  864. {
  865. if ($pid === null) {
  866. return imap_fetchheader($this->connection, $this->uid, FT_UID);
  867. }
  868. $rawHeaders = imap_fetchbody($this->connection, $this->uid, $pid, FT_UID);
  869. $headersEnd = strpos($rawHeaders, "\n\n") !== false
  870. ? strpos($rawHeaders, "\n\n")
  871. : strpos($rawHeaders, "\n\r\n");
  872. if ($headersEnd === false) {
  873. return '';
  874. }
  875. return substr($rawHeaders, 0, $headersEnd);
  876. }
  877. /**
  878. * Parses multiple parts.
  879. *
  880. * @param string $pid Part Id
  881. * @param string $mimeType Default mime-type
  882. * @param string $lookFor What parts to look for
  883. * @param int $pidAdd The level of nesting
  884. * @param bool $getAlternative Should the alternative part be used as well
  885. */
  886. private function parseMultiparts(
  887. string $pid,
  888. string $mimeType,
  889. string $lookFor = 'all',
  890. int $pidAdd = 1,
  891. bool $getAlternative = true
  892. ): void
  893. {
  894. // If the type is message/rfc822, gathers subparts that begin with the same Id
  895. // Skips multipart/alternative or multipart/report
  896. $excludeMime = $mimeType;
  897. $mimeType = $excludeMime === 'text/plain' ? 'text/html' : 'text/plain';
  898. $partLevel = count(explode('.', $pid));
  899. $pidLength = strlen($pid);
  900. foreach ($this->structure['pid'] as $partNo => $id) {
  901. $level = count(explode('.', $this->structure['pid'][$partNo]));
  902. switch ($lookFor) {
  903. case 'all':
  904. $condition = true;
  905. break;
  906. case 'subparts':
  907. $condition = (($level === $partLevel + 1) && ($pid === substr($this->structure['pid'][$partNo], 0, $pidLength)));
  908. break;
  909. case 'top':
  910. // Break missing intentionally
  911. default:
  912. // phpcs:disable SlevomatCodingStandard.ControlStructures.RequireTernaryOperator.TernaryOperatorNotUsed
  913. if ($this->isMultipart('related') || $this->isMultipart('mixed')) {
  914. // Top level and second level, but the same parent
  915. $condition = (strpos($this->structure['pid'][$partNo], '.') === false
  916. || (($level === 2) && substr($this->defaultPid, 0, 1) === substr($this->structure['pid'][$partNo], 0, 1)));
  917. } else {
  918. // Top level
  919. $condition = strpos($this->structure['pid'][$partNo], '.') === false;
  920. }
  921. break;
  922. }
  923. if (!$condition) {
  924. continue;
  925. }
  926. if (
  927. $this->isPartMultipart($partNo, 'alternative')
  928. || ($this->isPartMultipart($partNo, 'report'))
  929. || ($this->isPartMultipart($partNo, 'mixed'))
  930. ) {
  931. $subLevel = count(explode('.', $this->structure['pid'][$partNo]));
  932. foreach ($this->structure['pid'] as $multipartNo => $multipartPid) {
  933. // Part must begin with the last tested Id and be in the next level
  934. if (
  935. ($this->structure['ftype'][$multipartNo] === $mimeType)
  936. && $getAlternative
  937. && ($subLevel === $partLevel + $pidAdd)
  938. && ($pid === substr($multipartPid, 0, strlen($this->structure['pid'][$partNo])))
  939. ) {
  940. $this->addPart($partNo, 'inline');
  941. break;
  942. }
  943. }
  944. } elseif (
  945. ($this->structure['disposition'][$partNo] === 'inline')
  946. && (!$this->isPartMultipart($partNo, 'related'))
  947. && (!$this->isPartMultipart($partNo, 'mixed'))
  948. ) {
  949. // It is inline, but not related or mixed type
  950. if (
  951. (
  952. ($this->structure['ftype'][$partNo] !== $excludeMime)
  953. && ($pid !== $this->structure['pid'][$partNo])
  954. && (
  955. $getAlternative
  956. || !$this->isParentAlternative($partNo)
  957. )
  958. )
  959. || (
  960. ($this->structure['ftype'][$partNo] === $excludeMime)
  961. && (isset($this->structure['fname'][$partNo]))
  962. && ($pid !== $this->structure['pid'][$partNo])
  963. )
  964. ) {
  965. $this->addPart($partNo, 'inline');
  966. }
  967. } elseif ($this->structure['disposition'][$partNo] === 'attachment') {
  968. // It is an attachment; add to the attachment list
  969. $this->addPart($partNo, 'attach');
  970. }
  971. }
  972. }
  973. /**
  974. * Returns if the parent is multipart/alternative type.
  975. *
  976. * @param int $partNo Part Id
  977. * @return bool
  978. */
  979. private function isParentAlternative(int $partNo): bool
  980. {
  981. // Multipart/alternative can be a child of only two types
  982. if (($this->structure['ftype'][$partNo] !== 'text/plain') && ($this->structure['ftype'][$partNo] !== 'text/plain')) {
  983. return false;
  984. }
  985. $partId = $this->structure['pid'][$partNo];
  986. $partLevel = count(explode('.', $partId));
  987. if ($partLevel === 1) {
  988. return $this->isPartMultipart(0, 'alternative');
  989. }
  990. $parentId = substr($partId, 0, strrpos($partId, '.'));
  991. for ($i = 0; $i < count($this->structure['pid']); $i++) {
  992. // There can be multiple parts with the same Id (because we assign parent Id to parts without an own Id)
  993. if (($parentId === $this->structure['pid'][$i]) && ($this->isPartMultipart($i, 'alternative'))) {
  994. return true;
  995. }
  996. }
  997. return false;
  998. }
  999. /**
  1000. * Returns if the message is multipart/subtype.
  1001. *
  1002. * @param string $subtype Subtype
  1003. * @return bool
  1004. */
  1005. private function isMultipart(string $subtype): bool
  1006. {
  1007. return count($this->getMime(['multipart/' . $subtype])) > 0;
  1008. }
  1009. /**
  1010. * Returns if the given part is is multipart/subtype.
  1011. *
  1012. * @param int $partNo Part Id
  1013. * @param string $subtype Subtype
  1014. * @return bool
  1015. */
  1016. private function isPartMultipart(int $partNo, string $subtype): bool
  1017. {
  1018. return $this->structure['ftype'][$partNo] === 'multipart/' . $subtype;
  1019. }
  1020. /**
  1021. * Adds a part to the list.
  1022. *
  1023. * @param int $structureNo Part Id in the structure
  1024. * @param string $partType Part type
  1025. */
  1026. private function addPart(int $structureNo, string $partType): void
  1027. {
  1028. $fields = ['fname', 'pid', 'ftype', 'fsize', 'hasAttach', 'charset'];
  1029. $no = isset($this->parts[$partType]['pid']) ? count($this->parts[$partType]['pid']) : 0;
  1030. foreach ($fields as $field) {
  1031. if (!empty($this->structure[$field][$structureNo])) {
  1032. $this->parts[$partType][$field][$no] = $this->structure[$field][$structureNo];
  1033. }
  1034. }
  1035. }
  1036. /**
  1037. * Returns a part Id.
  1038. *
  1039. * @param string $pid Parent Id
  1040. * @param string $mimeType Requested mime-type
  1041. * @param string $lookFor What to look for
  1042. * @return string
  1043. */
  1044. private function getMultipartPid(string $pid, string $mimeType, string $lookFor): string
  1045. {
  1046. $partLevel = count(explode('.', $pid));
  1047. $pidLength = strlen($pid);
  1048. $pidAdd = 1;
  1049. foreach ($this->structure['pid'] as $partNo => $id) {
  1050. $level = count(explode('.', $this->structure['pid'][$partNo]));
  1051. switch ($lookFor) {
  1052. case 'subparts':
  1053. $condition = (($level === $partLevel + 1) && ($pid === substr($this->structure['pid'][$partNo], 0, $pidLength)));
  1054. break;
  1055. case 'multipart':
  1056. $condition = (($level === $partLevel + 1) && ($pid === $this->structure['pid'][$partNo]));
  1057. break;
  1058. default:
  1059. $condition = false;
  1060. break;
  1061. }
  1062. if (!$condition) {
  1063. continue;
  1064. }
  1065. if (
  1066. $this->isPartMultipart($partNo, 'alternative')
  1067. || ($this->isPartMultipart($partNo, 'report'))
  1068. || ($this->isPartMultipart($partNo, 'mixed'))
  1069. ) {
  1070. foreach ($this->structure['pid'] as $multipartNo => $multipartPid) {
  1071. // Part has to begin with the last tested Id and has to be in the next level
  1072. $subLevel = count(explode('.', $this->structure['pid'][$partNo]));
  1073. if (
  1074. ($this->structure['ftype'][$multipartNo] === $mimeType)
  1075. && ($subLevel === $partLevel + $pidAdd)
  1076. && ($pid === substr($multipartPid, 0, strlen($this->structure['pid'][$partNo])))
  1077. ) {
  1078. if (empty($this->structure['fname'][$multipartNo])) {
  1079. return $this->structure['pid'][$multipartNo];
  1080. }
  1081. } elseif ($this->isPartMultipart($multipartNo, 'alternative') || ($this->isPartMultipart($multipartNo, 'report'))) {
  1082. // Need to match this PID to next level in
  1083. $pid = $this->structure['pid'][$multipartNo];
  1084. $pidLength = strlen($pid);
  1085. $partLevel = count(explode('.', $pid));
  1086. $pidAdd = 2;
  1087. continue;
  1088. }
  1089. }
  1090. } elseif (
  1091. ($this->structure['disposition'][$partNo] === 'inline')
  1092. && (!$this->isPartMultipart($partNo, 'related'))
  1093. && (!$this->isPartMultipart($partNo, 'mixed'))
  1094. ) {
  1095. // It is inline, but not related or mixed type
  1096. if (($this->structure['ftype'][$partNo] === $mimeType) && (!isset($this->structure['fname'][$partNo]))) {
  1097. return $this->structure['pid'][$partNo];
  1098. }
  1099. }
  1100. }
  1101. }
  1102. /**
  1103. * Returns textual representation of the major mime-type.
  1104. *
  1105. * @param int $mimetypeNo Mime-type number
  1106. * @return string
  1107. */
  1108. private function getMajorMimeType(int $mimetypeNo): string
  1109. {
  1110. if (isset(self::$dataTypes[$mimetypeNo])) {
  1111. return self::$dataTypes[$mimetypeNo];
  1112. }
  1113. // Type other
  1114. return self::$dataTypes[max(array_keys(self::$dataTypes))];
  1115. }
  1116. /**
  1117. * Decodes given header.
  1118. *
  1119. * @param string $header Header contents
  1120. * @return string
  1121. */
  1122. private function decodeMimeHeader(string $header): string
  1123. {
  1124. $headerDecoded = imap_mime_header_decode($header);
  1125. // Decode failed
  1126. if ($headerDecoded === false) {
  1127. return trim($header);
  1128. }
  1129. $header = '';
  1130. for ($i = 0; $i < count($headerDecoded); $i++) {
  1131. $header .= $this->convertToUtf8($headerDecoded[$i]->text, $headerDecoded[$i]->charset);
  1132. }
  1133. return trim($header);
  1134. }
  1135. /**
  1136. * Decodes attachment's name.
  1137. *
  1138. * @param string $filename Filename
  1139. * @return string
  1140. */
  1141. private function decodeFilename(string $filename): string
  1142. {
  1143. if (preg_match('~(?P<charset>[^\']+)\'(?P<lang>[^\']*)\'(?P<filename>.+)~i', $filename, $parts)) {
  1144. $filename = $this->convertToUtf8(rawurldecode($parts['filename']), $parts['charset']);
  1145. } elseif (strpos($filename, '=?') === 0) {
  1146. $filename = $this->decodeMimeHeader($filename);
  1147. }
  1148. return $filename;
  1149. }
  1150. /**
  1151. * Converts a string from various encodings to UTF-8.
  1152. *
  1153. * @param string $string Input string
  1154. * @param string $charset String charset
  1155. * @return string
  1156. */
  1157. private function convertToUtf8(string $string, string $charset = ''): string
  1158. {
  1159. // Imap_mime_header_decode returns "default" in case of ASCII, but we make a detection for sure
  1160. if ($charset === 'default' || $charset === 'us-ascii' || empty($charset)) {
  1161. $charset = Charset::detect($string);
  1162. }
  1163. return Charset::convert2utf($string, $charset);
  1164. }
  1165. }