PageRenderTime 400ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/application/protected/extensions/vendors/PEAR/File/IMC/Parse.php

https://bitbucket.org/dinhtrung/yiicorecms/
PHP | 686 lines | 224 code | 78 blank | 384 comment | 41 complexity | e47f693e4ac030dad3bdcc2cc7ef7611 MD5 | raw file
Possible License(s): GPL-3.0, BSD-3-Clause, CC0-1.0, BSD-2-Clause, GPL-2.0, LGPL-2.1, LGPL-3.0
  1. <?php
  2. /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
  3. /**+----------------------------------------------------------------------+
  4. * | PHP version 5 |
  5. * +----------------------------------------------------------------------+
  6. * | Copyright (c) 1997-2008 The PHP Group |
  7. * +----------------------------------------------------------------------+
  8. * | All rights reserved. |
  9. * | |
  10. * | Redistribution and use in source and binary forms, with or without |
  11. * | modification, are permitted provided that the following conditions |
  12. * | are met: |
  13. * | |
  14. * | - Redistributions of source code must retain the above copyright |
  15. * | notice, this list of conditions and the following disclaimer. |
  16. * | - Redistributions in binary form must reproduce the above copyright |
  17. * | notice, this list of conditions and the following disclaimer in the |
  18. * | documentation and/or other materials provided with the distribution. |
  19. * | - Neither the name of the The PEAR Group nor the names of its |
  20. * | contributors may be used to endorse or promote products derived from |
  21. * | this software without specific prior written permission. |
  22. * | |
  23. * | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
  24. * | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
  25. * | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
  26. * | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
  27. * | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
  28. * | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
  29. * | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
  30. * | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
  31. * | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
  32. * | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
  33. * | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
  34. * | POSSIBILITY OF SUCH DAMAGE. |
  35. * +----------------------------------------------------------------------+
  36. *
  37. * @category File_Formats
  38. * @package File_IMC
  39. * @author Paul M. Jones <pmjones@ciaweb.net>
  40. * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
  41. * @version CVS: $Id: Parse.php 309122 2011-03-11 16:33:57Z till $
  42. * @link http://pear.php.net/package/File_IMC
  43. */
  44. /**
  45. * Common parser for IMC files (vCard, vCalendar, iCalendar)
  46. *
  47. * This class provides the methods to parse a file into an array.
  48. * By extending the class, you are able to define functions to handle
  49. * specific elements that need special decoding. For an example, see
  50. * File_IMC_Parse_vCard.
  51. *
  52. * @author Paul M. Jones <pmjones@ciaweb.net>
  53. *
  54. * @category File_Formats
  55. * @package File_IMC
  56. * @author Paul M. Jones <pmjones@ciaweb.net>
  57. * @author Till Klampaeckel <till@php.net>
  58. * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
  59. * @version Release: 0.4.2
  60. * @link http://pear.php.net/package/File_IMC
  61. */
  62. abstract class File_IMC_Parse
  63. {
  64. /**
  65. * Keeps track of the current line being parsed
  66. *
  67. * Starts at -1 so that the first line parsed is 0, since
  68. * _parseBlock() advances the counter by 1 at the beginning
  69. *
  70. * @see self::_parseBlock()
  71. *
  72. * @var int
  73. */
  74. protected $count = -1;
  75. /**
  76. * @var array
  77. */
  78. protected $data;
  79. /**
  80. * Reads a file for parsing, then sends it to $this->fromText()
  81. * and returns the results.
  82. *
  83. * @param string $filename The name of the file to read
  84. *
  85. * @return array An array of information extracted from the file.
  86. * @throws File_IMC_Exception If the file does not exist.
  87. * @throws File_IMC_Exception If the file is not readable.
  88. *
  89. * @see self::fromText()
  90. * @see self::_fromArray()
  91. */
  92. public function fromFile($filename, $decode_qp = true)
  93. {
  94. if (!file_exists($filename)) {
  95. throw new File_IMC_Exception("File {$filename} does not exist.");
  96. }
  97. if (!is_readable($filename)) {
  98. throw new File_IMC_Exception("Could not open {$filename}.");
  99. }
  100. // get the file data
  101. $text = implode('', file($filename));
  102. // dump to, and get return from, the fromText() method.
  103. return $this->fromText($text, $decode_qp);
  104. }
  105. /**
  106. * Prepares a block of text for parsing, then sends it through and
  107. * returns the results from $this->_fromArray().
  108. *
  109. * @param string $text A block of text to read for information.
  110. *
  111. * @return array An array of information extracted from the source text.
  112. *
  113. * @see self::_fromArray()
  114. */
  115. public function fromText($text, $decode_qp = true)
  116. {
  117. // convert all kinds of line endings to Unix-standard and get
  118. // rid of double blank lines.
  119. $text = $this->_convertLineEndings($text);
  120. // unfold lines. concat two lines where line 1 ends in \n and
  121. // line 2 starts with any amount of whitespace. only removes
  122. // the first whitespace character, leaves others in place.
  123. $fold_regex = '(\n)([ |\t])';
  124. $text = preg_replace("/$fold_regex/i", "", $text);
  125. // convert the resulting text to an array of lines
  126. $lines = explode("\n", $text);
  127. // parse the array of lines and return info
  128. $this->data = $this->_fromArray($lines, $decode_qp);
  129. return $this->data;
  130. }
  131. abstract function getVersion();
  132. /**
  133. * Converts line endings in text.
  134. *
  135. * Takes any text block and converts all line endings to UNIX
  136. * standard. DOS line endings are \r\n, Mac are \r, and UNIX is \n.
  137. * As a side-effect, all double-newlines (\n\n) are converted to a
  138. * single-newline.
  139. *
  140. * NOTE: Acts on the text block in-place; does not return a value.
  141. *
  142. * @param string $text The string on which to convert line endings.
  143. *
  144. * @return void
  145. */
  146. protected function _convertLineEndings($text)
  147. {
  148. // first, replace \r\n with \n to fix up from DOS and Mac
  149. $text = str_replace("\r\n", "\n", $text);
  150. $text = str_replace("\r", "\n", $text);
  151. return $text;
  152. }
  153. /**
  154. * Splits a string into an array. Honors backslash-escaped
  155. * delimiters, (i.e., splits at ';' not '\;') and double-quotes
  156. * (will not break inside double-quotes ("")).
  157. *
  158. * @param string $text The string to split into an array.
  159. *
  160. * @param string $delim Character to split string at.
  161. *
  162. * @param bool $recurse If true, recursively parse the entire text
  163. * for all occurrences of the delimiter; if false, only parse for
  164. * the first occurrence. Defaults to true.
  165. *
  166. * @return string|array An array of values, or a single string.
  167. */
  168. public function _splitByDelim($text, $delim, $recurse = true)
  169. {
  170. // where in the string is the delimiter?
  171. $pos = false;
  172. // was the previously-read character a backslash?
  173. // (used for tracking escaped characters)
  174. $prevIsBackslash = false;
  175. // are we currently inside a quoted passage?
  176. $inQuotes = false;
  177. // the length of the text to be parsed
  178. $len = strlen($text);
  179. // go through the text character by character, find the
  180. // first occurrence of the delimiter, save it, and
  181. // recursively parse the rest of the text
  182. for ($i = 0; $i < $len; $i++) {
  183. // if the current char is a double-quote, and the
  184. // previous char was _not_ an escaping backslash,
  185. // then note that we are now inside a quoted passage.
  186. if ($text{$i} == '"' && $prevIsBackslash == false) {
  187. ($inQuotes == true) ? $inQuotes = false : $inQuotes = true;
  188. }
  189. // if the current char is the delimiter, and we are _not_
  190. // inside quotes, and the delimiter has not been backslash-
  191. // escaped, then note the position of the delimiter and
  192. // break out of the loop.
  193. if ($text{$i} == $delim &&
  194. $inQuotes == false &&
  195. $prevIsBackslash == false) {
  196. $pos = $i;
  197. break;
  198. }
  199. // we have not found quotes, or the delimiter.
  200. // is the current char an escaping backslash?
  201. if ($text{$i} == "\\") {
  202. $prevIsBackslash = true;
  203. } else {
  204. $prevIsBackslash = false;
  205. }
  206. }
  207. // have we found the delimiter in the text?
  208. if ($pos === false) {
  209. // we have not found the delimiter anywhere in the
  210. // text. return the text as it is.
  211. return array($text);
  212. }
  213. // find the portions of the text to the left and the
  214. // right of the delimiter
  215. $left = trim(substr($text, 0, $pos));
  216. $right = trim(substr($text, $pos+1, strlen($text)));
  217. // should we recursively parse the rest of the text?
  218. if ($recurse) {
  219. // parse the right portion for the same delimiter, and
  220. // merge the results with the left-portion.
  221. return array_merge(
  222. array($left),
  223. $this->_splitByDelim($right, $delim, $recurse)
  224. );
  225. }
  226. // no recursion
  227. return array($left, $right);
  228. }
  229. /**
  230. * Splits a string into an array at semicolons.
  231. *
  232. * @param string $text The string to split into an array.
  233. *
  234. * @param bool $convertSingle If splitting the string results in a
  235. * single array element, return a string instead of a one-element
  236. * array.
  237. *
  238. * @param bool $recurse If true, recursively parse the entire text
  239. * for all occurrences of the delimiter; if false, only parse for
  240. * the first occurrence. Defaults to true.
  241. *
  242. * @return string|array An array of values, or a single string.
  243. *
  244. * @see self::_splitByDelim()
  245. */
  246. protected function _splitBySemi($text, $recurse = true)
  247. {
  248. return $this->_splitByDelim($text, ";", $recurse);
  249. }
  250. /**
  251. * Splits a string into an array at commas.
  252. *
  253. * @param string $text The string to split into an array.
  254. *
  255. * @param bool $recurse If true, recursively parse the entire text
  256. * for all occurrences of the delimiter; if false, only parse for
  257. * the first occurrence. Defaults to true.
  258. *
  259. * @return string|array An array of values, or a single string.
  260. *
  261. * @see self::_splitByDelim()
  262. */
  263. protected function _splitByComma($text, $recurse = true)
  264. {
  265. return $this->_splitByDelim($text, ",", $recurse);
  266. }
  267. /**
  268. *
  269. * Splits the line into types/parameters and values.
  270. *
  271. * @todo A parameter w/ 1 quote will break everything. Try to
  272. * come up with a good way to fix this.
  273. *
  274. * @param string $text The string to split into an array.
  275. *
  276. * @param bool $recurse If true, recursively parse the entire text
  277. * for all occurrences of the delimiter; if false, only parse for
  278. * the first occurrence. Defaults to false (this is different from
  279. * {@link self::_splitByCommon()} and {@link self::_splitBySemi()}).
  280. *
  281. * @return array The first element contains types and parameters
  282. * (before the colon). The second element contains the line's value
  283. * (after the colon).
  284. */
  285. protected function _splitByColon($text, $recurse = false)
  286. {
  287. return $this->_splitByDelim($text, ":", $recurse);
  288. }
  289. /**
  290. * Used to make string human-readable after being a vCard value.
  291. *
  292. * Converts...
  293. * \; => ;
  294. * \, => ,
  295. * literal \n => newline
  296. *
  297. * @param string|array $text The text to unescape.
  298. *
  299. * @return mixed
  300. */
  301. protected function _unescape($text)
  302. {
  303. if (is_array($text)) {
  304. foreach ($text as $key => $val) {
  305. $text[$key] = $this->_unescape($val);
  306. }
  307. } else {
  308. /*
  309. $text = str_replace('\:', ':', $text);
  310. $text = str_replace('\;', ';', $text);
  311. $text = str_replace('\,', ',', $text);
  312. $text = str_replace('\n', "\n", $text);
  313. */
  314. // combined with \r per #16637
  315. $find = array('\:', '\;', '\,', '\n', '\r');
  316. $replace = array(':', ';', ',', "\n", "\r");
  317. $text = str_replace($find, $replace, $text);
  318. }
  319. return $text;
  320. }
  321. /**
  322. * Parses an array of source lines and returns an array of vCards.
  323. * Each element of the array is itself an array expressing the types,
  324. * parameters, and values of each part of the vCard. Processes both
  325. * 2.1 and 3.0 vCard sources.
  326. *
  327. * @param array $source An array of lines to be read for vCard
  328. * information.
  329. *
  330. * @return array An array of of vCard information extracted from the
  331. * source array.
  332. *
  333. * @todo fix missing colon = skip line
  334. */
  335. protected function _fromArray($source, $decode_qp = true)
  336. {
  337. $parsed = $this->_parseBlock($source);
  338. $parsed = $this->_unescape($parsed);
  339. return $parsed;
  340. }
  341. /**
  342. * Goes through the IMC file, recursively processing BEGIN-END blocks
  343. *
  344. * Handles nested blocks, such as vEvents (BEGIN:VEVENT) and vTodos
  345. * (BEGIN:VTODO) inside vCalendars (BEGIN:VCALENDAR).
  346. *
  347. * @param array Array of lines in the IMC file
  348. *
  349. * @return array
  350. */
  351. protected function _parseBlock(array $source)
  352. {
  353. $max = count($source);
  354. for ($this->count++; $this->count < $max; $this->count++) {
  355. $line = $source[$this->count];
  356. // if the line is blank, skip it.
  357. if (trim($line) == '') {
  358. continue;
  359. }
  360. // get the left and right portions. The part
  361. // to the left of the colon is the type and parameters;
  362. // the part to the right of the colon is the value data.
  363. if (!(list($left, $right) = $this->_splitByColon($line))) {
  364. // colon not found, skip whole line
  365. continue;
  366. }
  367. if (strtoupper($left) == "BEGIN") {
  368. $block[$right][] = $this->_parseBlock($source);
  369. } elseif (strtoupper($left) == "END") {
  370. return $block;
  371. } else {
  372. // we're not on an ending line, so collect info from
  373. // this line into the current card. split the
  374. // left-portion of the line into a type-definition
  375. // (the kind of information) and parameters for the
  376. // type.
  377. $tmp = $this->_splitBySemi($left);
  378. $group = $this->_getGroup($tmp);
  379. $typedef = $this->_getTypeDef($tmp);
  380. $params = $this->_getParams($tmp);
  381. // if we are decoding quoted-printable, do so now.
  382. // QUOTED-PRINTABLE is not allowed in version 3.0,
  383. // but we don't check for versioning, so we do it
  384. // regardless. ;-)
  385. $resp = $this->_decode_qp($params, $right);
  386. $params = $resp[0];
  387. $right = $resp[1];
  388. // now get the value-data from the line, based on
  389. // the typedef
  390. $func = '_parse' . strtoupper($typedef);
  391. if (method_exists($this, $func)) {
  392. $value = $this->$func($right);
  393. } else {
  394. // by default, just grab the plain value. keep
  395. // as an array to make sure *all* values are
  396. // arrays. for consistency. ;-)
  397. $value = array(array($right));
  398. }
  399. // add the type, parameters, and value to the
  400. // current card array. note that we allow multiple
  401. // instances of the same type, which might be dumb
  402. // in some cases (e.g., N).
  403. $block[$typedef][] = array(
  404. 'group' => $group,
  405. 'param' => $params,
  406. 'value' => $value
  407. );
  408. }
  409. }
  410. return $block;
  411. }
  412. /**
  413. * Takes a line and extracts the Group for the line (a group is
  414. * identified as a prefix-with-dot to the Type-Definition; e.g.,
  415. * Group.ADR or Group.ORG).
  416. *
  417. * @param array $text Array containing left side (before colon) split by
  418. * semi-colon from a line.
  419. *
  420. * @return string The group for the line.
  421. *
  422. * @see self::_getTypeDef()
  423. * @see self::_splitBySemi()
  424. */
  425. protected function _getGroup(array $text)
  426. {
  427. // find the first element (the typedef)
  428. $tmp = $text[0];
  429. // find a dot in the typedef
  430. $pos = strpos($tmp, '.');
  431. // is there a '.' in the typedef?
  432. if ($pos === false) {
  433. // no group
  434. return '';
  435. }
  436. // yes, return the group name
  437. return substr($tmp, 0, $pos);
  438. }
  439. /**
  440. * Takes a line and extracts the Type-Definition for the line (not
  441. * including the Group portion; e.g., in Group.ADR, only ADR is
  442. * returned).
  443. *
  444. * @param array $text Array containing left side (before colon) split by
  445. * semi-colon from a line.
  446. *
  447. * @return string The type definition for the line.
  448. *
  449. * @see self::_getGroup()
  450. * @see self::_splitBySemi()
  451. */
  452. protected function _getTypeDef(array $text)
  453. {
  454. // find the first element (the typedef)
  455. $tmp = $text[0];
  456. // find a dot in the typedef
  457. $pos = strpos($tmp, '.');
  458. // is there a '.' in the typedef?
  459. if ($pos === false) {
  460. // no group
  461. return $tmp;
  462. }
  463. // yes, return the typedef without the group name
  464. return substr($tmp, $pos + 1);
  465. }
  466. /**
  467. * Finds the Type-Definition parameters for a line.
  468. *
  469. * @param array Array containing left side (before colon) split by
  470. * semi-colon from a line.
  471. *
  472. * @return array An array of parameters.
  473. *
  474. * @see self::_splitBySemi()
  475. */
  476. protected function _getParams(array $text)
  477. {
  478. // drop the first element of the array (the type-definition)
  479. array_shift($text);
  480. // set up an array to retain the parameters, if any
  481. $params = array();
  482. // loop through each parameter. the params may be in the format...
  483. // "TYPE=type1,type2,type3"
  484. // ...or...
  485. // "TYPE=type1;TYPE=type2;TYPE=type3"
  486. foreach ($text as $full) {
  487. // split the full parameter at the equal sign so we can tell
  488. // the parameter name from the parameter value
  489. $tmp = explode("=", $full, 2);
  490. // the key is the left portion of the parameter (before
  491. // '='). if in 2.1 format, the key may in fact be the
  492. // parameter value, not the parameter name.
  493. $key = strtoupper(trim($tmp[0]));
  494. // get the parameter name by checking to see if it's in
  495. // vCard 2.1 or 3.0 format.
  496. $name = $this->_getParamName($key);
  497. // list of all parameter values
  498. $listall = array_key_exists(1, $tmp) ? trim($tmp[1]) : '';
  499. // if there is a value-list for this parameter, they are
  500. // separated by commas, so split them out too.
  501. $list = $this->_splitByComma($listall);
  502. // now loop through each value in the parameter and retain
  503. // it. if the value is blank, that means it's a 2.1-style
  504. // param, and the key itself is the value.
  505. foreach ($list as $val) {
  506. if (trim($val) != '') {
  507. // 3.0 formatted parameter
  508. $params[$name][] = trim($val);
  509. } else {
  510. // 2.1 formatted parameter
  511. $params[$name][] = $key;
  512. }
  513. }
  514. // if, after all this, there are no parameter values for the
  515. // parameter name, retain no info about the parameter (saves
  516. // ram and checking-time later).
  517. if (count($params[$name]) == 0) {
  518. unset($params[$name]);
  519. }
  520. }
  521. // return the parameters array.
  522. return $params;
  523. }
  524. /**
  525. * Returns the parameter name for parameters given without names.
  526. *
  527. * The vCard 2.1 specification allows parameter values without a
  528. * name. The parameter name is then determined from the unique
  529. * parameter value.
  530. *
  531. * Shamelessly lifted from Frank Hellwig <frank@hellwig.org> and his
  532. * vCard PHP project <http://vcardphp.sourceforge.net>.
  533. *
  534. * @param string $value The first element in a parameter name-value
  535. * pair.
  536. *
  537. * @return string The proper parameter name (TYPE, ENCODING, or
  538. * VALUE).
  539. */
  540. protected function _getParamName($value)
  541. {
  542. static $types = array (
  543. 'DOM', 'INTL', 'POSTAL', 'PARCEL','HOME', 'WORK',
  544. 'PREF', 'VOICE', 'FAX', 'MSG', 'CELL', 'PAGER',
  545. 'BBS', 'MODEM', 'CAR', 'ISDN', 'VIDEO',
  546. 'AOL', 'APPLELINK', 'ATTMAIL', 'CIS', 'EWORLD',
  547. 'INTERNET', 'IBMMAIL', 'MCIMAIL',
  548. 'POWERSHARE', 'PRODIGY', 'TLX', 'X400',
  549. 'GIF', 'CGM', 'WMF', 'BMP', 'MET', 'PMB', 'DIB',
  550. 'PICT', 'TIFF', 'PDF', 'PS', 'JPEG', 'QTIME',
  551. 'MPEG', 'MPEG2', 'AVI',
  552. 'WAVE', 'AIFF', 'PCM',
  553. 'X509', 'PGP'
  554. );
  555. // CONTENT-ID added by pmj
  556. static $values = array (
  557. 'INLINE', 'URL', 'CID', 'CONTENT-ID'
  558. );
  559. // 8BIT added by pmj
  560. static $encodings = array (
  561. '7BIT', '8BIT', 'QUOTED-PRINTABLE', 'BASE64'
  562. );
  563. // changed by pmj to the following so that the name defaults to
  564. // whatever the original value was. Frank Hellwig's original
  565. // code was "$name = 'UNKNOWN'".
  566. $name = $value;
  567. if (in_array($value, $types)) {
  568. $name = 'TYPE';
  569. } elseif (in_array($value, $values)) {
  570. $name = 'VALUE';
  571. } elseif (in_array($value, $encodings)) {
  572. $name = 'ENCODING';
  573. }
  574. return $name;
  575. }
  576. /**
  577. * Looks at a line's parameters; if one of them is
  578. * ENCODING[] => QUOTED-PRINTABLE then decode the text in-place.
  579. *
  580. * @param array $params A parameter array from a vCard line.
  581. *
  582. * @param string $text A right-part (after-the-colon part) from a line.
  583. *
  584. * @return array
  585. *
  586. * @uses quoted_printable_decode()
  587. */
  588. protected function _decode_qp(array $params, $text)
  589. {
  590. // loop through each parameter
  591. foreach ($params as $param_key => $param_val) {
  592. // check to see if it's an encoding param
  593. if (trim(strtoupper($param_key)) != 'ENCODING') {
  594. continue;
  595. }
  596. // loop through each encoding param value
  597. foreach ($param_val as $enc_key => $enc_val) {
  598. // if any of the values are QP, decode the text
  599. // in-place and return
  600. if (trim(strtoupper($enc_val)) == 'QUOTED-PRINTABLE') {
  601. $text = quoted_printable_decode($text);
  602. return array($params, $text);
  603. }
  604. }
  605. }
  606. return array($params, $text);
  607. }
  608. }