PageRenderTime 52ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/runtime/ext/mailparse/mime.cpp

http://github.com/facebook/hiphop-php
C++ | 1020 lines | 762 code | 132 blank | 126 comment | 228 complexity | 9e9a05b76bc474fc0bfe8064e0261b58 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, MIT, LGPL-2.0, Apache-2.0
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
  6. | Copyright (c) 1997-2010 The PHP Group |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 3.01 of the PHP license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.php.net/license/3_01.txt |
  12. | If you did not receive a copy of the PHP license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@php.net so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. #include "hphp/runtime/ext/mailparse/mime.h"
  18. #include "hphp/runtime/ext/stream/ext_stream.h"
  19. #include "hphp/runtime/base/array-init.h"
  20. #include "hphp/runtime/base/array-iterator.h"
  21. #include "hphp/runtime/base/builtin-functions.h"
  22. #include "hphp/runtime/base/mem-file.h"
  23. #include "hphp/runtime/base/runtime-error.h"
  24. #define MAXLEVELS 20
  25. #define MAXPARTS 300
  26. namespace HPHP {
  27. ///////////////////////////////////////////////////////////////////////////////
  28. MimePart::MimeHeader::MimeHeader()
  29. : m_empty(true) {
  30. }
  31. MimePart::MimeHeader::MimeHeader(const char *value)
  32. : m_empty(false) {
  33. assertx(value);
  34. m_attributes = Array::CreateDArray();
  35. m_value = String(value, CopyString);
  36. }
  37. MimePart::MimeHeader::MimeHeader(php_rfc822_tokenized_t *toks)
  38. : m_empty(false) {
  39. int i, first_semi, next_semi, comments_before_semi, netscape_bug = 0;
  40. String name_buf;
  41. StringBuffer value_buf;
  42. bool is_rfc2231_name = false;
  43. char *check_name;
  44. int charset_p = 0, prevcharset_p = 0;
  45. bool namechanged = false, currentencoded = false;
  46. m_attributes = Array::CreateDArray();
  47. /* php_rfc822_print_tokens(toks); */
  48. /* look for optional ; which separates optional attributes from the main
  49. value */
  50. for (first_semi = 2; first_semi < toks->ntokens; first_semi++)
  51. if (toks->tokens[first_semi].token == ';') break;
  52. m_value = String(php_rfc822_recombine_tokens
  53. (toks, 2, first_semi - 2,
  54. PHP_RFC822_RECOMBINE_STRTOLOWER |
  55. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
  56. if (first_semi < toks->ntokens) first_semi++;
  57. /* Netscape Bug: Messenger sometimes omits the semi when wrapping the
  58. * the header.
  59. * That means we have to be even more clever than the spec says that
  60. * we need to :-/
  61. * */
  62. while (first_semi < toks->ntokens) {
  63. /* find the next ; */
  64. comments_before_semi = 0;
  65. for (next_semi = first_semi; next_semi < toks->ntokens; next_semi++) {
  66. if (toks->tokens[next_semi].token == ';') break;
  67. if (toks->tokens[next_semi].token == '(') comments_before_semi++;
  68. }
  69. i = first_semi;
  70. if (i < next_semi) {
  71. i++;
  72. /* ignore comments */
  73. while (i < next_semi && toks->tokens[i].token == '(') {
  74. i++;
  75. }
  76. if (i < next_semi && toks->tokens[i].token == '=') {
  77. /* Here, next_semi --> "name" and i --> "=", so skip "=" sign */
  78. i++;
  79. /* count those tokens; we expect "token = token" (3 tokens); if there
  80. * are more than that, then something is quite possibly wrong
  81. * - Netscape Bug! */
  82. if (next_semi < toks->ntokens && toks->tokens[next_semi].token != ';'
  83. && next_semi - first_semi - comments_before_semi > 3) {
  84. next_semi = i + 1;
  85. netscape_bug = 1;
  86. }
  87. String name(php_rfc822_recombine_tokens
  88. (toks, first_semi, 1,
  89. PHP_RFC822_RECOMBINE_STRTOLOWER|
  90. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
  91. String value(php_rfc822_recombine_tokens
  92. (toks, i, next_semi - i,
  93. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
  94. /* support rfc2231 mime parameter value
  95. *
  96. * Parameter Value Continuations:
  97. *
  98. * Content-Type: message/external-body; access-type=URL;
  99. * URL*0="ftp://";
  100. * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
  101. *
  102. * is semantically identical to
  103. *
  104. * Content-Type: message/external-body; access-type=URL;
  105. * URL="ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
  106. *
  107. * Original rfc2231 support by IceWarp Ltd. <info@icewarp.com>
  108. */
  109. check_name = const_cast<char*>(strchr(name.data(), '*'));
  110. if (check_name) {
  111. currentencoded = true;
  112. /* Is last char * - charset encoding */
  113. charset_p = (name[name.size() -1] == '*');
  114. /* Leave only attribute name without * */
  115. *check_name = 0;
  116. /* New item or continuous */
  117. if (name_buf.isNull()) {
  118. namechanged = false;
  119. name_buf = name;
  120. } else {
  121. namechanged = (name_buf != name);
  122. if (!namechanged) {
  123. name.clear();
  124. }
  125. }
  126. /* Check if name changed*/
  127. if (!namechanged) {
  128. /* Append string to buffer - check if to be encoded... */
  129. rfc2231_to_mime(value_buf, (char*)value.data(), charset_p,
  130. prevcharset_p);
  131. /* Mark previous */
  132. prevcharset_p = charset_p;
  133. }
  134. is_rfc2231_name = true;
  135. }
  136. /* Last item was encoded */
  137. if (is_rfc2231_name) {
  138. /* Name not null and name differs with new name*/
  139. if (!name.empty() && name_buf != name) {
  140. /* Finalize packet */
  141. rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p);
  142. auto const name_key =
  143. m_attributes.convertKey<IntishCast::Cast>(name_buf);
  144. auto str = value_buf.detach();
  145. m_attributes.set(name_key, make_tv<KindOfString>(str.get()));
  146. value_buf.clear();
  147. prevcharset_p = 0;
  148. is_rfc2231_name = false;
  149. name_buf.clear();
  150. /* New non encoded name*/
  151. if (!currentencoded) {
  152. /* Add string*/
  153. auto const updated_name_key =
  154. m_attributes.convertKey<IntishCast::Cast>(name);
  155. m_attributes.set(updated_name_key,
  156. make_tv<KindOfString>(value.get()));
  157. } else { /* Encoded name changed*/
  158. if (namechanged) {
  159. /* Append string to buffer - check if to be encoded... */
  160. rfc2231_to_mime(value_buf, (char*)value.data(), charset_p,
  161. prevcharset_p);
  162. /* Mark */
  163. is_rfc2231_name = true;
  164. name_buf = name;
  165. prevcharset_p = charset_p;
  166. }
  167. }
  168. namechanged = false;
  169. }
  170. } else {
  171. auto const name_key =
  172. m_attributes.convertKey<IntishCast::Cast>(name);
  173. m_attributes.set(name_key, make_tv<KindOfString>(value.get()));
  174. }
  175. }
  176. }
  177. if (next_semi < toks->ntokens && !netscape_bug) {
  178. next_semi++;
  179. }
  180. first_semi = next_semi;
  181. netscape_bug = 0;
  182. }
  183. if (is_rfc2231_name) {
  184. /* Finalize packet */
  185. rfc2231_to_mime(value_buf, NULL, 0, prevcharset_p);
  186. auto const name_key =
  187. m_attributes.convertKey<IntishCast::Cast>(name_buf);
  188. auto str = value_buf.detach();
  189. m_attributes.set(name_key, make_tv<KindOfString>(str.get()));
  190. }
  191. }
  192. void MimePart::MimeHeader::clear() {
  193. m_empty = true;
  194. m_value.clear();
  195. m_attributes.reset();
  196. }
  197. Variant MimePart::MimeHeader::get(const String& attrname) {
  198. auto const arrkey =
  199. m_attributes.convertKey<IntishCast::Cast>(attrname);
  200. return m_attributes[arrkey];
  201. }
  202. void MimePart::MimeHeader::getAll(Array &ret, const String& valuelabel,
  203. const String& attrprefix) {
  204. for (ArrayIter iter(m_attributes); iter; ++iter) {
  205. String s = attrprefix + iter.first().toString();
  206. auto const arrkey = ret.convertKey<IntishCast::Cast>(s);
  207. ret.set(arrkey, iter.secondVal());
  208. }
  209. /* do this last so that a bogus set of headers like this:
  210. * Content-Type: multipart/related;
  211. * boundary="----=_NextPart_00_0017_01C091F4.1B5EF6B0";
  212. * type="text/html"
  213. *
  214. * doesn't overwrite content-type with the type="text/html"
  215. * value.
  216. * */
  217. auto const arrkey = ret.convertKey<IntishCast::Cast>(valuelabel);
  218. ret.set(arrkey, make_tv<KindOfString>(m_value.get()));
  219. }
  220. void MimePart::MimeHeader::rfc2231_to_mime(StringBuffer &value_buf,
  221. char* value,
  222. int charset_p, int prevcharset_p) {
  223. char *strp, *startofvalue = NULL;
  224. int quotes = 0;
  225. /* Process string, get positions and replace */
  226. /* Set to start of buffer*/
  227. if (charset_p) {
  228. /* Previous charset already set so only convert %nn to =nn*/
  229. if (prevcharset_p) quotes=2;
  230. strp = value;
  231. while (*strp) {
  232. /* Quote handling*/
  233. if (*strp == '\'') {
  234. if (quotes <= 1) {
  235. /* End of charset*/
  236. if (quotes == 0) {
  237. *strp=0;
  238. } else {
  239. startofvalue = strp+1;
  240. }
  241. quotes++;
  242. }
  243. } else {
  244. /* Replace % with = - quoted printable*/
  245. if (*strp == '%' && quotes==2) {
  246. *strp = '=';
  247. }
  248. }
  249. strp++;
  250. }
  251. }
  252. /* If first encoded token*/
  253. if (charset_p && !prevcharset_p && startofvalue) {
  254. value_buf.append("=?");
  255. value_buf.append(value);
  256. value_buf.append("?Q?");
  257. value_buf.append(startofvalue);
  258. }
  259. /* If last encoded token*/
  260. if (prevcharset_p && !charset_p) {
  261. value_buf.append("?=");
  262. }
  263. /* Append value*/
  264. if ((!charset_p || (prevcharset_p && charset_p)) && value) {
  265. value_buf.append(value);
  266. }
  267. }
  268. ///////////////////////////////////////////////////////////////////////////////
  269. MimePart::MimePart()
  270. : m_startpos(0), m_endpos(0), m_bodystart(0), m_bodyend(0),
  271. m_nlines(0), m_nbodylines(0) {
  272. m_headers = Array::CreateDArray();
  273. /* begin in header parsing mode */
  274. m_parsedata.in_header = true;
  275. m_parsedata.is_dummy = false;
  276. m_parsedata.completed = false;
  277. }
  278. ///////////////////////////////////////////////////////////////////////////////
  279. // enumeration
  280. bool MimePart::enumeratePartsImpl(Enumerator *top, Enumerator **child,
  281. PFUNC_ENUMERATOR callback, void *ptr) {
  282. *child = NULL;
  283. if (!(this->*callback)(top, ptr)) return false;
  284. Enumerator next;
  285. *child = &next;
  286. next.id = 1;
  287. if (!strncasecmp(m_content_type.m_value.data(), "multipart/", 10)) {
  288. next.id = 0;
  289. }
  290. for (ArrayIter iter(m_children); iter; ++iter) {
  291. if (next.id) {
  292. auto childpart = cast<MimePart>(iter.second());
  293. if (!childpart->enumeratePartsImpl(top, &next.next, callback, ptr)) {
  294. return false;
  295. }
  296. }
  297. next.id++;
  298. }
  299. return true;
  300. }
  301. void MimePart::enumerateParts(PFUNC_ENUMERATOR callback, void *ptr) {
  302. Enumerator top;
  303. top.id = 1;
  304. enumeratePartsImpl(&top, &top.next, callback, ptr);
  305. }
  306. struct find_part_struct {
  307. const char *searchfor;
  308. MimePart *foundpart;
  309. };
  310. bool MimePart::getStructure(Enumerator *id, void *ptr) {
  311. char intbuf[16];
  312. int len, i = 0;
  313. int buf_size = 1024;
  314. char *buf = (char*)malloc(buf_size);
  315. buf[0] = '\0';
  316. while (id && i < buf_size) {
  317. sprintf(intbuf, "%d", id->id);
  318. len = strlen(intbuf);
  319. if (len > (buf_size-i)) {
  320. raise_warning("too many nested sections in message");
  321. free(buf);
  322. return false;
  323. }
  324. if ((i + len + 1) >= buf_size) {
  325. buf_size = buf_size << 1;
  326. buf = (char*)realloc(buf, buf_size);
  327. if (!buf) {
  328. raise_fatal_error(
  329. folly::sformat("The structure buffer has been exceeded "
  330. "({}). Please try decreasing the nesting "
  331. "depth of messages and report this to the "
  332. "developers.", buf_size).c_str());
  333. }
  334. }
  335. sprintf(&buf[i], "%s%c", intbuf, id->next ? '.' : '\0');
  336. i += len + (id->next ? 1 : 0);
  337. id = id->next;
  338. }
  339. ((Array*)ptr)->append(String(buf, AttachString));
  340. return true;
  341. }
  342. Array MimePart::getStructure() {
  343. Array ret = Array::CreateDArray();
  344. enumerateParts(&MimePart::getStructure, &ret);
  345. return ret;
  346. }
  347. bool MimePart::findPart(Enumerator *id, void *ptr) {
  348. struct find_part_struct *find = (find_part_struct *)ptr;
  349. const unsigned char *num = (const unsigned char*)find->searchfor;
  350. unsigned int n;
  351. while (id) {
  352. if (!isdigit((int)*num)) return true;
  353. /* convert from decimal to int */
  354. n = 0;
  355. while (isdigit((int)*num)) n = (n * 10) + (*num++ - '0');
  356. if (*num) {
  357. if (*num != '.') return true;
  358. num++;
  359. }
  360. if (n != (unsigned int)id->id) return true;
  361. id = id->next;
  362. }
  363. if (*num == 0) find->foundpart = this;
  364. return true;
  365. }
  366. Resource MimePart::findByName(const char *name) {
  367. struct find_part_struct find;
  368. find.searchfor = name;
  369. find.foundpart = NULL;
  370. enumerateParts(&MimePart::findPart, &find);
  371. return Resource{find.foundpart};
  372. }
  373. static int filter_into_work_buffer(int c, void *dat) {
  374. MimePart *part = (MimePart*)dat;
  375. return part->filter(c);
  376. }
  377. int MimePart::filter(int c) {
  378. char buf[] = {(char)c, '\0'};
  379. m_parsedata.workbuf += buf;
  380. if (m_parsedata.workbuf.size() >= 4096) {
  381. (this->*m_extract_func)(m_parsedata.workbuf);
  382. m_parsedata.workbuf.clear();
  383. }
  384. return c;
  385. }
  386. void MimePart::decoderPrepare(bool do_decode) {
  387. enum mbfl_no_encoding from = mbfl_no_encoding_8bit;
  388. if (do_decode && !m_content_transfer_encoding.empty()) {
  389. from = mbfl_name2no_encoding(m_content_transfer_encoding.data());
  390. if (from == mbfl_no_encoding_invalid) {
  391. if (strcasecmp("binary", m_content_transfer_encoding.data()) != 0) {
  392. raise_warning("mbstring doesn't know how to decode %s "
  393. "transfer encoding!",
  394. m_content_transfer_encoding.data());
  395. }
  396. from = mbfl_no_encoding_8bit;
  397. }
  398. }
  399. m_parsedata.workbuf.clear();
  400. if (do_decode) {
  401. if (from == mbfl_no_encoding_8bit || from == mbfl_no_encoding_7bit) {
  402. m_extract_filter = NULL;
  403. } else {
  404. m_extract_filter = mbfl_convert_filter_new(from, mbfl_no_encoding_8bit,
  405. filter_into_work_buffer,
  406. NULL, this);
  407. }
  408. }
  409. }
  410. void MimePart::decoderFinish() {
  411. if (m_extract_filter) {
  412. mbfl_convert_filter_flush(m_extract_filter);
  413. mbfl_convert_filter_delete(m_extract_filter);
  414. }
  415. if (m_extract_func && !m_parsedata.workbuf.empty()) {
  416. (this->*m_extract_func)(m_parsedata.workbuf);
  417. m_parsedata.workbuf.clear();
  418. }
  419. }
  420. void MimePart::decoderFeed(const String& str) {
  421. if (!str.empty()) {
  422. if (m_extract_filter) {
  423. for (int i = 0; i < str.size(); i++) {
  424. if (mbfl_convert_filter_feed(str[i], m_extract_filter) < 0) {
  425. raise_warning("filter conversion failed. Input message is "
  426. "probably incorrectly encoded");
  427. return;
  428. }
  429. }
  430. } else {
  431. (this->*m_extract_func)(str);
  432. }
  433. }
  434. }
  435. const StaticString s_1_pt_0("1.0");
  436. bool MimePart::isVersion1() {
  437. return m_mime_version == s_1_pt_0 || m_parent;
  438. }
  439. const StaticString
  440. s_headers("headers"),
  441. s_starting_pos("starting-pos"),
  442. s_starting_pos_body("starting-pos-body"),
  443. s_ending_pos("ending-pos"),
  444. s_ending_pos_body("ending-pos-body"),
  445. s_line_count("line-count"),
  446. s_body_line_count("body-line-count"),
  447. s_charset("charset"),
  448. s_transfer_encoding("transfer-encoding"),
  449. s_content_type("content-type"),
  450. s_content_("content-"),
  451. s_text_plain_error("text/plain, (error)"),
  452. s_content_disposition("content-disposition"),
  453. s_disposition_("disposition-"),
  454. s_content_location("content-location"),
  455. s_content_base("content-base"),
  456. s_content_boundary("content-boundary"),
  457. s_content_id("content-id"),
  458. s_content_description("content-description"),
  459. s_content_language("content-language"),
  460. s_content_md5("content-md5"),
  461. s_boundary("boundary"),
  462. s_to("to"),
  463. s_cc("cc"),
  464. s_mime_version("mime-version"),
  465. s_content_transfer_encoding("content-transfer-encoding");
  466. Variant MimePart::getPartData() {
  467. Array ret = Array::CreateDArray();
  468. ret.set(s_headers, m_headers);
  469. ret.set(s_starting_pos, m_startpos);
  470. ret.set(s_starting_pos_body, m_bodystart);
  471. if (!m_parent) {
  472. ret.set(s_ending_pos, m_endpos);
  473. ret.set(s_ending_pos_body, m_bodyend);
  474. ret.set(s_line_count, m_nlines);
  475. ret.set(s_body_line_count, m_nbodylines);
  476. } else {
  477. ret.set(s_ending_pos, m_bodyend);
  478. ret.set(s_ending_pos_body, m_bodyend);
  479. ret.set(s_line_count, m_nlines ? m_nlines - 1 : m_nlines);
  480. ret.set(s_body_line_count, m_nbodylines ? m_nbodylines - 1 : m_nbodylines);
  481. }
  482. if (!m_charset.empty()) {
  483. ret.set(s_charset, m_charset);
  484. } else {
  485. ret.set(s_charset, "us-ascii");
  486. }
  487. if (!m_content_transfer_encoding.empty()) {
  488. ret.set(s_transfer_encoding, m_content_transfer_encoding);
  489. } else {
  490. ret.set(s_transfer_encoding, "8bit");
  491. }
  492. if (!m_content_type.empty()) {
  493. m_content_type.getAll(ret, s_content_type, s_content_);
  494. } else {
  495. ret.set(s_content_type, s_text_plain_error);
  496. }
  497. if (!m_content_disposition.empty()) {
  498. m_content_disposition.getAll(ret, s_content_disposition, s_disposition_);
  499. }
  500. if (!m_content_location.empty()) {
  501. ret.set(s_content_location, m_content_location);
  502. }
  503. if (!m_content_base.empty()) {
  504. ret.set(s_content_base, m_content_base);
  505. } else {
  506. ret.set(s_content_base, "/");
  507. }
  508. if (!m_boundary.empty()) {
  509. ret.set(s_content_boundary, m_boundary);
  510. }
  511. /* extract the address part of the content-id only */
  512. Variant contentId = m_headers[s_content_id];
  513. if (!contentId.isNull()) {
  514. php_rfc822_tokenized_t *toks =
  515. php_mailparse_rfc822_tokenize((const char*)contentId.toString().data(),
  516. true);
  517. php_rfc822_addresses_t *addrs =
  518. php_rfc822_parse_address_tokens(toks);
  519. if (addrs->naddrs > 0) {
  520. ret.set(s_content_id, String(addrs->addrs[0].address, CopyString));
  521. }
  522. php_rfc822_free_addresses(addrs);
  523. php_rfc822_tokenize_free(toks);
  524. }
  525. auto copyHeader = [&](const Variant& key) {
  526. if (m_headers.exists(key)) ret.set(key, m_headers[key]);
  527. };
  528. copyHeader(s_content_description);
  529. copyHeader(s_content_language);
  530. copyHeader(s_content_md5);
  531. return ret;
  532. }
  533. bool MimePart::parse(const char *buf, int bufsize) {
  534. while (bufsize > 0) {
  535. /* look for EOL */
  536. int len = 0;
  537. for (; len < bufsize; len++) {
  538. if (buf[len] == '\n') break;
  539. }
  540. if (len < bufsize && buf[len] == '\n') {
  541. ++len;
  542. m_parsedata.workbuf += String(buf, len, CopyString);
  543. if (!ProcessLine(req::ptr<MimePart>(this), m_parsedata.workbuf)) {
  544. // ProcessLine() only returns FAILURE in case the count of children
  545. // have exceeded MAXPARTS at the very beginning, without doing any work.
  546. // Short-circuit since the exceeded state won't change on subsequent
  547. // calls.
  548. return false;
  549. }
  550. m_parsedata.workbuf.clear();
  551. } else {
  552. m_parsedata.workbuf += String(buf, len, CopyString);
  553. }
  554. buf += len;
  555. bufsize -= len;
  556. }
  557. return true;
  558. }
  559. req::ptr<MimePart> MimePart::createChild(int startpos, bool inherit) {
  560. auto child = req::make<MimePart>();
  561. m_parsedata.lastpart = child;
  562. child->m_parent = this;
  563. m_children.append(Resource(child));
  564. child->m_startpos = child->m_endpos = child->m_bodystart =
  565. child->m_bodyend = startpos;
  566. if (inherit) {
  567. child->m_content_transfer_encoding = m_content_transfer_encoding;
  568. child->m_charset = m_charset;
  569. }
  570. return child;
  571. }
  572. bool MimePart::processHeader() {
  573. if (m_parsedata.headerbuf.empty()) {
  574. return true;
  575. }
  576. /* parse the header line */
  577. php_rfc822_tokenized_t *toks =
  578. php_mailparse_rfc822_tokenize(m_parsedata.headerbuf.data(), 0);
  579. /* valid headers consist of at least three tokens,
  580. with the first being a string and the second token being a ':' */
  581. if (toks->ntokens < 2 || toks->tokens[0].token != 0 ||
  582. toks->tokens[1].token != ':') {
  583. m_parsedata.headerbuf.clear();
  584. php_rfc822_tokenize_free(toks);
  585. return false;
  586. }
  587. /* get a lower-case version of the first token */
  588. String header_key(php_rfc822_recombine_tokens
  589. (toks, 0, 1,
  590. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|
  591. PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString);
  592. const char *header_val = strchr(m_parsedata.headerbuf.data(), ':');
  593. String header_val_stripped(php_rfc822_recombine_tokens
  594. (toks, 2, toks->ntokens-2,
  595. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS|
  596. PHP_RFC822_RECOMBINE_STRTOLOWER), AttachString);
  597. if (header_val) {
  598. header_val++;
  599. while (isspace(*header_val)) {
  600. header_val++;
  601. }
  602. auto const header_arrkey =
  603. m_headers.convertKey<IntishCast::Cast>(header_key);
  604. /* add the header to the hash.
  605. * join multiple To: or Cc: lines together */
  606. if ((header_key == s_to || header_key == s_cc) &&
  607. m_headers.exists(header_arrkey)) {
  608. String newstr = m_headers[header_arrkey].toString();
  609. newstr += ", ";
  610. newstr += header_val;
  611. m_headers.set(header_arrkey, make_tv<KindOfString>(newstr.get()));
  612. } else {
  613. if (m_headers.exists(header_arrkey)) {
  614. auto const zheaderval = m_headers.lval(header_arrkey);
  615. if (isArrayLikeType(zheaderval.type())) {
  616. asArrRef(zheaderval).append(String(header_val, CopyString));
  617. } else {
  618. // Create a nested array if there is more than one of the same header
  619. Array zarr = Array::CreateVArray();
  620. zarr.append(zheaderval.tv());
  621. zarr.append(String(header_val, CopyString));
  622. m_headers.set(header_arrkey, make_array_like_tv(zarr.get()));
  623. }
  624. } else {
  625. String str(header_val, CopyString);
  626. m_headers.set(header_arrkey, make_tv<KindOfString>(str.get()));
  627. }
  628. }
  629. /* if it is useful, keep a pointer to it in the mime part */
  630. if (header_key == s_mime_version) {
  631. m_mime_version = header_val_stripped;
  632. } else if (header_key == s_content_location) {
  633. m_content_location =
  634. String(php_rfc822_recombine_tokens
  635. (toks, 2, toks->ntokens-2,
  636. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
  637. } else if (header_key == s_content_base) {
  638. m_content_base =
  639. String(php_rfc822_recombine_tokens
  640. (toks, 2, toks->ntokens-2,
  641. PHP_RFC822_RECOMBINE_IGNORE_COMMENTS), AttachString);
  642. } else if (header_key == s_content_transfer_encoding) {
  643. m_content_transfer_encoding = header_val_stripped;
  644. } else if (header_key == s_content_type) {
  645. m_content_type = MimeHeader(toks);
  646. Variant boundary = m_content_type.get(s_boundary);
  647. if (!boundary.isNull()) {
  648. m_boundary = boundary.toString();
  649. }
  650. Variant charset = m_content_type.get(s_charset);
  651. if (!charset.isNull()) {
  652. m_charset = charset.toString();
  653. }
  654. } else if (header_key == s_content_disposition) {
  655. m_content_disposition = MimeHeader(toks);
  656. }
  657. }
  658. php_rfc822_tokenize_free(toks);
  659. m_parsedata.headerbuf.clear();
  660. return true;
  661. }
  662. bool MimePart::ProcessLine(req::ptr<MimePart> workpart, const String& line) {
  663. /* sanity check */
  664. if (workpart->m_children.size() > MAXPARTS) {
  665. raise_warning("MIME message too complex");
  666. return false;
  667. }
  668. const char *c = line.data();
  669. /* strip trailing \r\n -- we always have a trailing \n */
  670. int origcount = line.size();
  671. int linelen = origcount - 1;
  672. if (linelen && c[linelen-1] == '\r') {
  673. --linelen;
  674. }
  675. /* Discover which part we were last working on */
  676. while (workpart->m_parsedata.lastpart) {
  677. auto lastpart = workpart->m_parsedata.lastpart;
  678. if (lastpart->m_parsedata.completed) {
  679. UpdatePositions(workpart, workpart->m_endpos + origcount,
  680. workpart->m_endpos + origcount, 1);
  681. return true;
  682. }
  683. if (workpart->m_boundary.empty() || workpart->m_parsedata.in_header) {
  684. workpart = lastpart;
  685. continue;
  686. }
  687. int bound_len = workpart->m_boundary.size();
  688. /* Look for a boundary */
  689. if (c[0] == '-' && c[1] == '-' && linelen >= 2+bound_len &&
  690. strncasecmp(workpart->m_boundary.data(), c+2, bound_len) == 0) {
  691. /* is it the final boundary ? */
  692. if (linelen >= 4 + bound_len && strncmp(c+2+bound_len, "--", 2) == 0) {
  693. lastpart->m_parsedata.completed = true;
  694. UpdatePositions(workpart, workpart->m_endpos + origcount,
  695. workpart->m_endpos + origcount, 1);
  696. return true;
  697. }
  698. auto newpart =
  699. workpart->createChild(workpart->m_endpos + origcount, true);
  700. UpdatePositions(workpart, workpart->m_endpos + origcount,
  701. workpart->m_endpos + linelen, 1);
  702. newpart->m_mime_version = workpart->m_mime_version;
  703. newpart->m_parsedata.in_header = true;
  704. return true;
  705. }
  706. workpart = lastpart;
  707. }
  708. if (!workpart->m_parsedata.in_header) {
  709. if (!workpart->m_parsedata.completed && !workpart->m_parsedata.lastpart) {
  710. /* update the body/part end positions.
  711. * For multipart messages, the final newline belongs to the boundary.
  712. * Otherwise it belongs to the body
  713. * */
  714. if (workpart->m_parent &&
  715. strncasecmp(workpart->getParent()->m_content_type.m_value.data(),
  716. "multipart/", 10) == 0) {
  717. UpdatePositions(workpart, workpart->m_endpos + origcount,
  718. workpart->m_endpos + linelen, true);
  719. } else {
  720. UpdatePositions(workpart, workpart->m_endpos + origcount,
  721. workpart->m_endpos + origcount, true);
  722. }
  723. }
  724. } else {
  725. if (linelen > 0) {
  726. UpdatePositions(workpart, workpart->m_endpos + origcount,
  727. workpart->m_endpos + linelen, true);
  728. if (*c == ' ' || *c == '\t') {
  729. /* This doesn't technically confirm to rfc2822, as we're replacing
  730. \t with \s, but this seems to fix cases where clients incorrectly
  731. fold by inserting a \t character.
  732. */
  733. workpart->m_parsedata.headerbuf += " ";
  734. c++; linelen--;
  735. } else {
  736. workpart->processHeader();
  737. }
  738. /* save header for possible continuation */
  739. workpart->m_parsedata.headerbuf += String(c, linelen, CopyString);
  740. } else {
  741. /* end of headers */
  742. workpart->processHeader();
  743. /* start of body */
  744. workpart->m_parsedata.in_header = false;
  745. workpart->m_bodystart = workpart->m_endpos + origcount;
  746. UpdatePositions(workpart, workpart->m_bodystart, workpart->m_bodystart,
  747. true);
  748. --workpart->m_nbodylines;
  749. /* some broken mailers include the content-type header but not a
  750. * mime-version header.
  751. * Let's relax and pretend they said they were mime 1.0 compatible */
  752. if (workpart->m_mime_version.empty() &&
  753. !workpart->m_content_type.empty()) {
  754. workpart->m_mime_version = "1.0";
  755. }
  756. if (!workpart->isVersion1()) {
  757. /* if we don't understand the MIME version, discard the content-type
  758. and boundary */
  759. workpart->m_content_disposition.clear();
  760. workpart->m_boundary.clear();
  761. workpart->m_content_type.clear();
  762. workpart->m_content_type = MimeHeader("text/plain");
  763. }
  764. /* if there is no content type, default to text/plain, but use
  765. multipart/digest when in a multipart/rfc822 message */
  766. if (workpart->isVersion1() && workpart->m_content_type.empty()) {
  767. char *def_type = "text/plain";
  768. if (workpart->m_parent &&
  769. strcasecmp(workpart->getParent()->m_content_type.m_value.data(),
  770. "multipart/digest") == 0) {
  771. def_type = "message/rfc822";
  772. }
  773. workpart->m_content_type = MimeHeader(def_type);
  774. }
  775. /* if no charset had previously been set, either through inheritance
  776. * or by an explicit content-type header, default to us-ascii */
  777. if (workpart->m_charset.isNull()) {
  778. workpart->m_charset = "us-ascii";
  779. }
  780. if (strcasecmp(workpart->m_content_type.m_value.data(),
  781. "message/rfc822") == 0) {
  782. workpart = workpart->createChild(workpart->m_bodystart, false);
  783. workpart->m_parsedata.in_header = true;
  784. return true;
  785. }
  786. /* create a section for the preamble that precedes the first boundary */
  787. if (!workpart->m_boundary.empty()) {
  788. workpart = workpart->createChild(workpart->m_bodystart, true);
  789. workpart->m_parsedata.in_header = false;
  790. workpart->m_parsedata.is_dummy = true;
  791. return true;
  792. }
  793. return true;
  794. }
  795. }
  796. return true;
  797. }
  798. void MimePart::UpdatePositions(req::ptr<MimePart> part, int newendpos,
  799. int newbodyend, int deltanlines) {
  800. while (part) {
  801. part->m_endpos = newendpos;
  802. part->m_bodyend = newbodyend;
  803. part->m_nlines += deltanlines;
  804. if (!part->m_parsedata.in_header) {
  805. part->m_nbodylines += deltanlines;
  806. }
  807. part = part->m_parent;
  808. }
  809. }
  810. Variant MimePart::extract(const Variant& filename, const Variant& callbackfunc, int decode,
  811. bool isfile) {
  812. /* filename can be a filename or a stream */
  813. req::ptr<File> file;
  814. if (filename.isResource()) {
  815. file = cast<File>(filename);
  816. } else if (isfile) {
  817. file = File::Open(filename.toString(), "rb");
  818. } else {
  819. /* filename is the actual data */
  820. String data = filename.toString();
  821. file = req::make<MemFile>(data.data(), data.size());
  822. }
  823. if (!file) {
  824. return false;
  825. }
  826. m_extract_context = callbackfunc;
  827. if (callbackfunc.isString() && callbackfunc.toString().empty()) {
  828. m_extract_func = &MimePart::outputToStdout;
  829. } else {
  830. if (callbackfunc.isNull()) {
  831. m_extract_func = &MimePart::outputToString;
  832. } else if (callbackfunc.isResource()) {
  833. m_extract_func = &MimePart::outputToFile;
  834. } else {
  835. m_extract_func = &MimePart::callUserFunc;
  836. }
  837. }
  838. if (extractImpl(decode, file)) {
  839. if (callbackfunc.isNull()) {
  840. return m_extract_context;
  841. }
  842. if (callbackfunc.isResource()) {
  843. return HHVM_FN(stream_get_contents)(callbackfunc.toResource());
  844. }
  845. return true;
  846. }
  847. return init_null();
  848. }
  849. int MimePart::extractImpl(int decode, req::ptr<File> src) {
  850. /* figure out where the message part starts/ends */
  851. int start_pos = (decode & DecodeNoHeaders) ? m_bodystart : m_startpos;
  852. int end = (decode & DecodeNoBody) ?
  853. m_bodystart : (m_parent ? m_bodyend : m_endpos);
  854. decoderPrepare(decode & Decode8Bit);
  855. if (!src->seek(start_pos)) {
  856. raise_warning("unable to seek to section start");
  857. decoderFinish();
  858. return false;
  859. }
  860. while (start_pos < end) {
  861. int n = 4095;
  862. if (n > end - start_pos) n = end - start_pos;
  863. String str = src->read(n);
  864. if (str.empty()) {
  865. raise_warning("error reading from file at offset %d", start_pos);
  866. decoderFinish();
  867. return false;
  868. }
  869. decoderFeed(str);
  870. start_pos += str.size();
  871. }
  872. decoderFinish();
  873. return true;
  874. }
  875. void MimePart::callUserFunc(const String& s) {
  876. vm_call_user_func(m_extract_context, make_vec_array(s));
  877. }
  878. void MimePart::outputToStdout(const String& s) {
  879. g_context->write(s);
  880. }
  881. void MimePart::outputToFile(const String& s) {
  882. cast<File>(m_extract_context)->write(s);
  883. }
  884. void MimePart::outputToString(const String& s) {
  885. m_extract_context = m_extract_context.toString() + s;
  886. }
  887. ///////////////////////////////////////////////////////////////////////////////
  888. }