PageRenderTime 48ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/administrator/components/com_csvimproved/classes/excel/reader.php

https://bitbucket.org/dgough/annamaria-daneswood-25102012
PHP | 1050 lines | 593 code | 96 blank | 361 comment | 123 complexity | ba25ccdd46c7094873b3ad0b1e90c1e2 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
  3. /**
  4. * A class for reading Microsoft Excel Spreadsheets.
  5. *
  6. * Originally developed by Vadim Tkachenko under the name PHPExcelReader.
  7. * (http://sourceforge.net/projects/phpexcelreader)
  8. * Based on the Java version by Andy Khan (http://www.andykhan.com). Now
  9. * maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.
  10. *
  11. * PHP versions 4 and 5
  12. *
  13. * LICENSE: This source file is subject to version 3.0 of the PHP license
  14. * that is available through the world-wide-web at the following URI:
  15. * http://www.php.net/license/3_0.txt. If you did not receive a copy of
  16. * the PHP License and are unable to obtain it through the web, please
  17. * send a note to license@php.net so we can mail you a copy immediately.
  18. *
  19. * @package CSVImproved
  20. * @subpackage Excelreader
  21. * @author Vadim Tkachenko <vt@apachephp.com>
  22. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  23. * @version CVS: $Id: reader.php 286 2008-06-01 02:51:30Z Suami $
  24. * @link http://pear.php.net/package/Spreadsheet_Excel_Reader
  25. * @see OLE, Spreadsheet_Excel_Writer
  26. */
  27. /**
  28. * Excel reader
  29. */
  30. define('SPREADSHEET_EXCEL_READER_BIFF8', 0x600);
  31. define('SPREADSHEET_EXCEL_READER_BIFF7', 0x500);
  32. define('SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS', 0x5);
  33. define('SPREADSHEET_EXCEL_READER_WORKSHEET', 0x10);
  34. define('SPREADSHEET_EXCEL_READER_TYPE_BOF', 0x809);
  35. define('SPREADSHEET_EXCEL_READER_TYPE_EOF', 0x0a);
  36. define('SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET', 0x85);
  37. define('SPREADSHEET_EXCEL_READER_TYPE_DIMENSION', 0x200);
  38. define('SPREADSHEET_EXCEL_READER_TYPE_ROW', 0x208);
  39. define('SPREADSHEET_EXCEL_READER_TYPE_DBCELL', 0xd7);
  40. define('SPREADSHEET_EXCEL_READER_TYPE_FILEPASS', 0x2f);
  41. define('SPREADSHEET_EXCEL_READER_TYPE_NOTE', 0x1c);
  42. define('SPREADSHEET_EXCEL_READER_TYPE_TXO', 0x1b6);
  43. define('SPREADSHEET_EXCEL_READER_TYPE_RK', 0x7e);
  44. define('SPREADSHEET_EXCEL_READER_TYPE_RK2', 0x27e);
  45. define('SPREADSHEET_EXCEL_READER_TYPE_MULRK', 0xbd);
  46. define('SPREADSHEET_EXCEL_READER_TYPE_MULBLANK', 0xbe);
  47. define('SPREADSHEET_EXCEL_READER_TYPE_INDEX', 0x20b);
  48. define('SPREADSHEET_EXCEL_READER_TYPE_SST', 0xfc);
  49. define('SPREADSHEET_EXCEL_READER_TYPE_EXTSST', 0xff);
  50. define('SPREADSHEET_EXCEL_READER_TYPE_CONTINUE', 0x3c);
  51. define('SPREADSHEET_EXCEL_READER_TYPE_LABEL', 0x204);
  52. define('SPREADSHEET_EXCEL_READER_TYPE_LABELSST', 0xfd);
  53. define('SPREADSHEET_EXCEL_READER_TYPE_NUMBER', 0x203);
  54. define('SPREADSHEET_EXCEL_READER_TYPE_NAME', 0x18);
  55. define('SPREADSHEET_EXCEL_READER_TYPE_ARRAY', 0x221);
  56. define('SPREADSHEET_EXCEL_READER_TYPE_STRING', 0x207);
  57. define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA', 0x406);
  58. define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA2', 0x6);
  59. define('SPREADSHEET_EXCEL_READER_TYPE_FORMAT', 0x41e);
  60. define('SPREADSHEET_EXCEL_READER_TYPE_XF', 0xe0);
  61. define('SPREADSHEET_EXCEL_READER_TYPE_BOOLERR', 0x205);
  62. define('SPREADSHEET_EXCEL_READER_TYPE_UNKNOWN', 0xffff);
  63. define('SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR', 0x22);
  64. define('SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS', 0xE5);
  65. define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS' , 25569);
  66. define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904', 24107);
  67. define('SPREADSHEET_EXCEL_READER_MSINADAY', 86400);
  68. //define('SPREADSHEET_EXCEL_READER_MSINADAY', 24 * 60 * 60);
  69. //define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%.2f");
  70. define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%s");
  71. /*
  72. * Place includes, constant defines and $_GLOBAL settings here.
  73. * Make sure they have appropriate docblocks to avoid phpDocumentor
  74. * construing they are documented by the page-level docblock.
  75. */
  76. /**
  77. * A class for reading Microsoft Excel Spreadsheets.
  78. *
  79. * Originally developed by Vadim Tkachenko under the name PHPExcelReader.
  80. * (http://sourceforge.net/projects/phpexcelreader)
  81. * Based on the Java version by Andy Khan (http://www.andykhan.com). Now
  82. * maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.
  83. *
  84. * @category Spreadsheet
  85. * @package Spreadsheet_Excel_Reader
  86. * @author Vadim Tkachenko <vt@phpapache.com>
  87. * @copyright 1997-2005 The PHP Group
  88. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  89. * @version Release: @package_version@
  90. * @link http://pear.php.net/package/PackageName
  91. * @see OLE, Spreadsheet_Excel_Writer
  92. */
  93. class Spreadsheet_Excel_Reader {
  94. /**
  95. * Array of worksheets found
  96. *
  97. * @var array
  98. * @access public
  99. */
  100. var $boundsheets = array();
  101. /**
  102. * Array of format records found
  103. *
  104. * @var array
  105. * @access public
  106. */
  107. var $formatRecords = array();
  108. /**
  109. * todo
  110. *
  111. * @var array
  112. * @access public
  113. */
  114. var $sst = array();
  115. /**
  116. * Array of worksheets
  117. *
  118. * The data is stored in 'cells' and the meta-data is stored in an array
  119. * called 'cellsInfo'
  120. *
  121. * Example:
  122. *
  123. * $sheets --> 'cells' --> row --> column --> Interpreted value
  124. * --> 'cellsInfo' --> row --> column --> 'type' - Can be 'date', 'number', or 'unknown'
  125. * --> 'raw' - The raw data that Excel stores for that data cell
  126. *
  127. * @var array
  128. * @access public
  129. */
  130. var $sheets = array();
  131. /**
  132. * The data returned by OLE
  133. *
  134. * @var string
  135. * @access public
  136. */
  137. var $data;
  138. /**
  139. * OLE object for reading the file
  140. *
  141. * @var OLE object
  142. * @access private
  143. */
  144. var $_ole;
  145. /**
  146. * Default encoding
  147. *
  148. * @var string
  149. * @access private
  150. */
  151. var $_defaultEncoding;
  152. /**
  153. * Default number format
  154. *
  155. * @var integer
  156. * @access private
  157. */
  158. var $_defaultFormat = SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT;
  159. /**
  160. * todo
  161. * List of formats to use for each column
  162. *
  163. * @var array
  164. * @access private
  165. */
  166. var $_columnsFormat = array();
  167. /**
  168. * todo
  169. *
  170. * @var integer
  171. * @access private
  172. */
  173. var $_rowoffset = 1;
  174. /**
  175. * todo
  176. *
  177. * @var integer
  178. * @access private
  179. */
  180. var $_coloffset = 1;
  181. /**
  182. * List of default date formats used by Excel
  183. *
  184. * @var array
  185. * @access public
  186. */
  187. var $dateFormats = array (
  188. 0xe => "d/m/Y",
  189. 0xf => "d-M-Y",
  190. 0x10 => "d-M",
  191. 0x11 => "M-Y",
  192. 0x12 => "h:i a",
  193. 0x13 => "h:i:s a",
  194. 0x14 => "H:i",
  195. 0x15 => "H:i:s",
  196. 0x16 => "d/m/Y H:i",
  197. 0x2d => "i:s",
  198. 0x2e => "H:i:s",
  199. 0x2f => "i:s.S");
  200. /**
  201. * Default number formats used by Excel
  202. *
  203. * @var array
  204. * @access public
  205. */
  206. var $numberFormats = array(
  207. 0x1 => "%1.0f", // "0"
  208. 0x2 => "%1.2f", // "0.00",
  209. 0x3 => "%1.0f", //"#,##0",
  210. 0x4 => "%1.2f", //"#,##0.00",
  211. 0x5 => "%1.0f", /*"$#,##0;($#,##0)",*/
  212. 0x6 => '$%1.0f', /*"$#,##0;($#,##0)",*/
  213. 0x7 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  214. 0x8 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  215. 0x9 => '%1.0f%%', // "0%"
  216. 0xa => '%1.2f%%', // "0.00%"
  217. 0xb => '%1.2f', // 0.00E00",
  218. 0x25 => '%1.0f', // "#,##0;(#,##0)",
  219. 0x26 => '%1.0f', //"#,##0;(#,##0)",
  220. 0x27 => '%1.2f', //"#,##0.00;(#,##0.00)",
  221. 0x28 => '%1.2f', //"#,##0.00;(#,##0.00)",
  222. 0x29 => '%1.0f', //"#,##0;(#,##0)",
  223. 0x2a => '$%1.0f', //"$#,##0;($#,##0)",
  224. 0x2b => '%1.2f', //"#,##0.00;(#,##0.00)",
  225. 0x2c => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  226. 0x30 => '%1.0f'); //"##0.0E0";
  227. // }}}
  228. // {{{ Spreadsheet_Excel_Reader()
  229. /**
  230. * Constructor
  231. *
  232. * Some basic initialisation
  233. */
  234. function Spreadsheet_Excel_Reader(&$csviregistry)
  235. {
  236. require_once($csviregistry->GetVar('class_path').'excel/oleread.inc');
  237. $this->_ole =& new OLERead();
  238. $this->setUTFEncoder('iconv');
  239. }
  240. // }}}
  241. // {{{ setOutputEncoding()
  242. /**
  243. * Set the encoding method
  244. *
  245. * @param string Encoding to use
  246. * @access public
  247. */
  248. function setOutputEncoding($encoding)
  249. {
  250. $this->_defaultEncoding = $encoding;
  251. }
  252. // }}}
  253. // {{{ setUTFEncoder()
  254. /**
  255. * $encoder = 'iconv' or 'mb'
  256. * set iconv if you would like use 'iconv' for encode UTF-16LE to your encoding
  257. * set mb if you would like use 'mb_convert_encoding' for encode UTF-16LE to your encoding
  258. *
  259. * @access public
  260. * @param string Encoding type to use. Either 'iconv' or 'mb'
  261. */
  262. function setUTFEncoder($encoder = 'iconv')
  263. {
  264. $this->_encoderFunction = '';
  265. if ($encoder == 'iconv') {
  266. $this->_encoderFunction = function_exists('iconv') ? 'iconv' : '';
  267. } elseif ($encoder == 'mb') {
  268. $this->_encoderFunction = function_exists('mb_convert_encoding') ?
  269. 'mb_convert_encoding' :
  270. '';
  271. }
  272. }
  273. // }}}
  274. // {{{ setRowColOffset()
  275. /**
  276. * todo
  277. *
  278. * @access public
  279. * @param offset
  280. */
  281. function setRowColOffset($iOffset)
  282. {
  283. $this->_rowoffset = $iOffset;
  284. $this->_coloffset = $iOffset;
  285. }
  286. // }}}
  287. // {{{ setDefaultFormat()
  288. /**
  289. * Set the default number format
  290. *
  291. * @access public
  292. * @param Default format
  293. */
  294. function setDefaultFormat($sFormat)
  295. {
  296. $this->_defaultFormat = $sFormat;
  297. }
  298. // }}}
  299. // {{{ setColumnFormat()
  300. /**
  301. * Force a column to use a certain format
  302. *
  303. * @access public
  304. * @param integer Column number
  305. * @param string Format
  306. */
  307. function setColumnFormat($column, $sFormat)
  308. {
  309. $this->_columnsFormat[$column] = $sFormat;
  310. }
  311. // }}}
  312. // {{{ read()
  313. /**
  314. * Read the spreadsheet file using OLE, then parse
  315. *
  316. * @access public
  317. * @param filename
  318. * @todo return a valid value
  319. */
  320. function read($sFileName)
  321. {
  322. $res = $this->_ole->read($sFileName);
  323. // oops, something goes wrong (Darko Miljanovic)
  324. if($res === false) {
  325. // check error code
  326. if($this->_ole->error == 1) {
  327. // bad file
  328. die('The filename ' . $sFileName . ' is not readable');
  329. }
  330. // check other error codes here (eg bad fileformat, etc...)
  331. }
  332. $this->data = $this->_ole->getWorkBook();
  333. //echo "data =".$this->data;
  334. //$this->readRecords();
  335. $this->_parse();
  336. }
  337. // }}}
  338. // {{{ _parse()
  339. /**
  340. * Parse a workbook
  341. *
  342. * @access private
  343. * @return bool
  344. */
  345. function _parse()
  346. {
  347. $pos = 0;
  348. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  349. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  350. $version = ord($this->data[$pos + 4]) | ord($this->data[$pos + 5])<<8;
  351. $substreamType = ord($this->data[$pos + 6]) | ord($this->data[$pos + 7])<<8;
  352. //echo "Start parse code=".base_convert($code,10,16)." version=".base_convert($version,10,16)." substreamType=".base_convert($substreamType,10,16).""."\n";
  353. if (($version != SPREADSHEET_EXCEL_READER_BIFF8) &&
  354. ($version != SPREADSHEET_EXCEL_READER_BIFF7)) {
  355. return false;
  356. }
  357. if ($substreamType != SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS){
  358. return false;
  359. }
  360. //print_r($rec);
  361. $pos += $length + 4;
  362. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  363. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  364. while ($code != SPREADSHEET_EXCEL_READER_TYPE_EOF) {
  365. switch ($code) {
  366. case SPREADSHEET_EXCEL_READER_TYPE_SST:
  367. //echo "Type_SST\n";
  368. $spos = $pos + 4;
  369. $limitpos = $spos + $length;
  370. $uniqueStrings = $this->_GetInt4d($this->data, $spos+4);
  371. $spos += 8;
  372. for ($i = 0; $i < $uniqueStrings; $i++) {
  373. // Read in the number of characters
  374. if ($spos == $limitpos) {
  375. $opcode = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  376. $conlength = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  377. if ($opcode != 0x3c) {
  378. return -1;
  379. }
  380. $spos += 4;
  381. $limitpos = $spos + $conlength;
  382. }
  383. $numChars = ord($this->data[$spos]) | (ord($this->data[$spos+1]) << 8);
  384. //echo "i = $i pos = $pos numChars = $numChars ";
  385. $spos += 2;
  386. $optionFlags = ord($this->data[$spos]);
  387. $spos++;
  388. $asciiEncoding = (($optionFlags & 0x01) == 0) ;
  389. $extendedString = ( ($optionFlags & 0x04) != 0);
  390. // See if string contains formatting information
  391. $richString = ( ($optionFlags & 0x08) != 0);
  392. if ($richString) {
  393. // Read in the crun
  394. $formattingRuns = ord($this->data[$spos]) | (ord($this->data[$spos+1]) << 8);
  395. $spos += 2;
  396. }
  397. if ($extendedString) {
  398. // Read in cchExtRst
  399. $extendedRunLength = $this->_GetInt4d($this->data, $spos);
  400. $spos += 4;
  401. }
  402. $len = ($asciiEncoding)? $numChars : $numChars*2;
  403. if ($spos + $len < $limitpos) {
  404. $retstr = substr($this->data, $spos, $len);
  405. $spos += $len;
  406. }else{
  407. // found countinue
  408. $retstr = substr($this->data, $spos, $limitpos - $spos);
  409. $bytesRead = $limitpos - $spos;
  410. $charsLeft = $numChars - (($asciiEncoding) ? $bytesRead : ($bytesRead / 2));
  411. $spos = $limitpos;
  412. while ($charsLeft > 0){
  413. $opcode = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  414. $conlength = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  415. if ($opcode != 0x3c) {
  416. return -1;
  417. }
  418. $spos += 4;
  419. $limitpos = $spos + $conlength;
  420. $option = ord($this->data[$spos]);
  421. $spos += 1;
  422. if ($asciiEncoding && ($option == 0)) {
  423. $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength);
  424. $retstr .= substr($this->data, $spos, $len);
  425. $charsLeft -= $len;
  426. $asciiEncoding = true;
  427. }elseif (!$asciiEncoding && ($option != 0)){
  428. $len = min($charsLeft * 2, $limitpos - $spos); // min($charsLeft, $conlength);
  429. $retstr .= substr($this->data, $spos, $len);
  430. $charsLeft -= $len/2;
  431. $asciiEncoding = false;
  432. }elseif (!$asciiEncoding && ($option == 0)) {
  433. // Bummer - the string starts off as Unicode, but after the
  434. // continuation it is in straightforward ASCII encoding
  435. $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength);
  436. for ($j = 0; $j < $len; $j++) {
  437. $retstr .= $this->data[$spos + $j].chr(0);
  438. }
  439. $charsLeft -= $len;
  440. $asciiEncoding = false;
  441. }else{
  442. $newstr = '';
  443. for ($j = 0; $j < strlen($retstr); $j++) {
  444. $newstr = $retstr[$j].chr(0);
  445. }
  446. $retstr = $newstr;
  447. $len = min($charsLeft * 2, $limitpos - $spos); // min($charsLeft, $conlength);
  448. $retstr .= substr($this->data, $spos, $len);
  449. $charsLeft -= $len/2;
  450. $asciiEncoding = false;
  451. //echo "Izavrat\n";
  452. }
  453. $spos += $len;
  454. }
  455. }
  456. $retstr = ($asciiEncoding) ? $retstr : $this->_encodeUTF16($retstr);
  457. // echo "Str $i = $retstr\n";
  458. if ($richString){
  459. $spos += 4 * $formattingRuns;
  460. }
  461. // For extended strings, skip over the extended string data
  462. if ($extendedString) {
  463. $spos += $extendedRunLength;
  464. }
  465. //if ($retstr == 'Derby'){
  466. // echo "bb\n";
  467. //}
  468. $this->sst[]=$retstr;
  469. }
  470. /*$continueRecords = array();
  471. while ($this->getNextCode() == Type_CONTINUE) {
  472. $continueRecords[] = &$this->nextRecord();
  473. }
  474. //echo " 1 Type_SST\n";
  475. $this->shareStrings = new SSTRecord($r, $continueRecords);
  476. //print_r($this->shareStrings->strings);
  477. */
  478. // echo 'SST read: '.($time_end-$time_start)."\n";
  479. break;
  480. case SPREADSHEET_EXCEL_READER_TYPE_FILEPASS:
  481. return false;
  482. break;
  483. case SPREADSHEET_EXCEL_READER_TYPE_NAME:
  484. //echo "Type_NAME\n";
  485. break;
  486. case SPREADSHEET_EXCEL_READER_TYPE_FORMAT:
  487. $indexCode = ord($this->data[$pos+4]) | ord($this->data[$pos+5]) << 8;
  488. if ($version == SPREADSHEET_EXCEL_READER_BIFF8) {
  489. $numchars = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) << 8;
  490. if (ord($this->data[$pos+8]) == 0){
  491. $formatString = substr($this->data, $pos+9, $numchars);
  492. } else {
  493. $formatString = substr($this->data, $pos+9, $numchars*2);
  494. }
  495. } else {
  496. $numchars = ord($this->data[$pos+6]);
  497. $formatString = substr($this->data, $pos+7, $numchars*2);
  498. }
  499. $this->formatRecords[$indexCode] = $formatString;
  500. // echo "Type.FORMAT\n";
  501. break;
  502. case SPREADSHEET_EXCEL_READER_TYPE_XF:
  503. //global $dateFormats, $numberFormats;
  504. $indexCode = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) << 8;
  505. //echo "\nType.XF ".count($this->formatRecords['xfrecords'])." $indexCode ";
  506. if (array_key_exists($indexCode, $this->dateFormats)) {
  507. //echo "isdate ".$dateFormats[$indexCode];
  508. $this->formatRecords['xfrecords'][] = array(
  509. 'type' => 'date',
  510. 'format' => $this->dateFormats[$indexCode]
  511. );
  512. }elseif (array_key_exists($indexCode, $this->numberFormats)) {
  513. //echo "isnumber ".$this->numberFormats[$indexCode];
  514. $this->formatRecords['xfrecords'][] = array(
  515. 'type' => 'number',
  516. 'format' => $this->numberFormats[$indexCode]
  517. );
  518. }else{
  519. $isdate = FALSE;
  520. if ($indexCode > 0){
  521. if (isset($this->formatRecords[$indexCode]))
  522. $formatstr = $this->formatRecords[$indexCode];
  523. //echo '.other.';
  524. //echo "\ndate-time=$formatstr=\n";
  525. if ($formatstr)
  526. if (preg_match("/[^hmsday\/\-:\s]/i", $formatstr) == 0) { // found day and time format
  527. $isdate = TRUE;
  528. $formatstr = str_replace('mm', 'i', $formatstr);
  529. $formatstr = str_replace('h', 'H', $formatstr);
  530. //echo "\ndate-time $formatstr \n";
  531. }
  532. }
  533. if ($isdate){
  534. $this->formatRecords['xfrecords'][] = array(
  535. 'type' => 'date',
  536. 'format' => $formatstr,
  537. );
  538. }else{
  539. $this->formatRecords['xfrecords'][] = array(
  540. 'type' => 'other',
  541. 'format' => '',
  542. 'code' => $indexCode
  543. );
  544. }
  545. }
  546. //echo "\n";
  547. break;
  548. case SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR:
  549. //echo "Type.NINETEENFOUR\n";
  550. $this->nineteenFour = (ord($this->data[$pos+4]) == 1);
  551. break;
  552. case SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET:
  553. //echo "Type.BOUNDSHEET\n";
  554. $rec_offset = $this->_GetInt4d($this->data, $pos+4);
  555. $rec_typeFlag = ord($this->data[$pos+8]);
  556. $rec_visibilityFlag = ord($this->data[$pos+9]);
  557. $rec_length = ord($this->data[$pos+10]);
  558. if ($version == SPREADSHEET_EXCEL_READER_BIFF8){
  559. $chartype = ord($this->data[$pos+11]);
  560. if ($chartype == 0){
  561. $rec_name = substr($this->data, $pos+12, $rec_length);
  562. } else {
  563. $rec_name = $this->_encodeUTF16(substr($this->data, $pos+12, $rec_length*2));
  564. }
  565. }elseif ($version == SPREADSHEET_EXCEL_READER_BIFF7){
  566. $rec_name = substr($this->data, $pos+11, $rec_length);
  567. }
  568. $this->boundsheets[] = array('name'=>$rec_name,
  569. 'offset'=>$rec_offset);
  570. break;
  571. }
  572. //echo "Code = ".base_convert($r['code'],10,16)."\n";
  573. $pos += $length + 4;
  574. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  575. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  576. //$r = &$this->nextRecord();
  577. //echo "1 Code = ".base_convert($r['code'],10,16)."\n";
  578. }
  579. foreach ($this->boundsheets as $key=>$val){
  580. $this->sn = $key;
  581. $this->_parsesheet($val['offset']);
  582. }
  583. return true;
  584. }
  585. /**
  586. * Parse a worksheet
  587. *
  588. * @access private
  589. * @param todo
  590. * @todo fix return codes
  591. */
  592. function _parsesheet($spos)
  593. {
  594. $cont = true;
  595. // read BOF
  596. $code = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  597. $length = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  598. $version = ord($this->data[$spos + 4]) | ord($this->data[$spos + 5])<<8;
  599. $substreamType = ord($this->data[$spos + 6]) | ord($this->data[$spos + 7])<<8;
  600. if (($version != SPREADSHEET_EXCEL_READER_BIFF8) && ($version != SPREADSHEET_EXCEL_READER_BIFF7)) {
  601. return -1;
  602. }
  603. if ($substreamType != SPREADSHEET_EXCEL_READER_WORKSHEET){
  604. return -2;
  605. }
  606. //echo "Start parse code=".base_convert($code,10,16)." version=".base_convert($version,10,16)." substreamType=".base_convert($substreamType,10,16).""."\n";
  607. $spos += $length + 4;
  608. //var_dump($this->formatRecords);
  609. //echo "code $code $length";
  610. while($cont) {
  611. //echo "mem= ".memory_get_usage()."\n";
  612. // $r = &$this->file->nextRecord();
  613. $lowcode = ord($this->data[$spos]);
  614. if ($lowcode == SPREADSHEET_EXCEL_READER_TYPE_EOF) break;
  615. $code = $lowcode | ord($this->data[$spos+1])<<8;
  616. $length = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  617. $spos += 4;
  618. $this->sheets[$this->sn]['maxrow'] = $this->_rowoffset - 1;
  619. $this->sheets[$this->sn]['maxcol'] = $this->_coloffset - 1;
  620. //echo "Code=".base_convert($code,10,16)." $code\n";
  621. unset($this->rectype);
  622. $this->multiplier = 1; // need for format with %
  623. switch ($code) {
  624. // 512
  625. case SPREADSHEET_EXCEL_READER_TYPE_DIMENSION:
  626. //echo 'Type_DIMENSION ';
  627. if (!isset($this->numRows)) {
  628. if (($length == 10) || ($version == SPREADSHEET_EXCEL_READER_BIFF7)){
  629. $this->sheets[$this->sn]['numRows'] = ord($this->data[$spos+2]) | ord($this->data[$spos+3]) << 8;
  630. $this->sheets[$this->sn]['numCols'] = ord($this->data[$spos+6]) | ord($this->data[$spos+7]) << 8;
  631. } else {
  632. $this->sheets[$this->sn]['numRows'] = ord($this->data[$spos+4]) | ord($this->data[$spos+5]) << 8;
  633. $this->sheets[$this->sn]['numCols'] = ord($this->data[$spos+10]) | ord($this->data[$spos+11]) << 8;
  634. }
  635. }
  636. //echo 'numRows '.$this->numRows.' '.$this->numCols."\n";
  637. break;
  638. // 229
  639. case SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS:
  640. $cellRanges = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  641. for ($i = 0; $i < $cellRanges; $i++) {
  642. $fr = ord($this->data[$spos + 8*$i + 2]) | ord($this->data[$spos + 8*$i + 3])<<8;
  643. $lr = ord($this->data[$spos + 8*$i + 4]) | ord($this->data[$spos + 8*$i + 5])<<8;
  644. $fc = ord($this->data[$spos + 8*$i + 6]) | ord($this->data[$spos + 8*$i + 7])<<8;
  645. $lc = ord($this->data[$spos + 8*$i + 8]) | ord($this->data[$spos + 8*$i + 9])<<8;
  646. //$this->sheets[$this->sn]['mergedCells'][] = array($fr + 1, $fc + 1, $lr + 1, $lc + 1);
  647. if ($lr - $fr > 0) {
  648. $this->sheets[$this->sn]['cellsInfo'][$fr+1][$fc+1]['rowspan'] = $lr - $fr + 1;
  649. }
  650. if ($lc - $fc > 0) {
  651. $this->sheets[$this->sn]['cellsInfo'][$fr+1][$fc+1]['colspan'] = $lc - $fc + 1;
  652. }
  653. }
  654. //echo "Merged Cells $cellRanges $lr $fr $lc $fc\n";
  655. break;
  656. // 126
  657. case SPREADSHEET_EXCEL_READER_TYPE_RK:
  658. // 638
  659. case SPREADSHEET_EXCEL_READER_TYPE_RK2:
  660. //echo 'SPREADSHEET_EXCEL_READER_TYPE_RK'."\n";
  661. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  662. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  663. $rknum = $this->_GetInt4d($this->data, $spos + 6);
  664. $numValue = $this->_GetIEEE754($rknum);
  665. //echo $numValue." ";
  666. if ($this->isDate($spos)) {
  667. list($string, $raw) = $this->createDate($numValue);
  668. }else{
  669. $raw = $numValue;
  670. if (isset($this->_columnsFormat[$column + 1])){
  671. $this->curformat = $this->_columnsFormat[$column + 1];
  672. }
  673. $string = sprintf($this->curformat, $numValue * $this->multiplier);
  674. //$this->addcell(RKRecord($r));
  675. }
  676. $this->addcell($row, $column, $string, $raw);
  677. //echo "Type_RK $row $column $string $raw {$this->curformat}\n";
  678. break;
  679. // 253
  680. case SPREADSHEET_EXCEL_READER_TYPE_LABELSST:
  681. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  682. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  683. $xfindex = ord($this->data[$spos+4]) | ord($this->data[$spos+5])<<8;
  684. $index = $this->_GetInt4d($this->data, $spos + 6);
  685. //var_dump($this->sst);
  686. $this->addcell($row, $column, $this->sst[$index]);
  687. //echo "LabelSST $row $column $string\n";
  688. break;
  689. // 189
  690. case SPREADSHEET_EXCEL_READER_TYPE_MULRK:
  691. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  692. $colFirst = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  693. $colLast = ord($this->data[$spos + $length - 2]) | ord($this->data[$spos + $length - 1])<<8;
  694. $columns = $colLast - $colFirst + 1;
  695. $tmppos = $spos+4;
  696. for ($i = 0; $i < $columns; $i++) {
  697. $numValue = $this->_GetIEEE754($this->_GetInt4d($this->data, $tmppos + 2));
  698. if ($this->isDate($tmppos-4)) {
  699. list($string, $raw) = $this->createDate($numValue);
  700. }else{
  701. $raw = $numValue;
  702. if (isset($this->_columnsFormat[$colFirst + $i + 1])){
  703. $this->curformat = $this->_columnsFormat[$colFirst + $i + 1];
  704. }
  705. $string = sprintf($this->curformat, $numValue * $this->multiplier);
  706. }
  707. //$rec['rknumbers'][$i]['xfindex'] = ord($rec['data'][$pos]) | ord($rec['data'][$pos+1]) << 8;
  708. $tmppos += 6;
  709. $this->addcell($row, $colFirst + $i, $string, $raw);
  710. //echo "MULRK $row ".($colFirst + $i)." $string\n";
  711. }
  712. //MulRKRecord($r);
  713. // Get the individual cell records from the multiple record
  714. //$num = ;
  715. break;
  716. // 515
  717. case SPREADSHEET_EXCEL_READER_TYPE_NUMBER:
  718. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  719. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  720. $tmp = unpack("ddouble", substr($this->data, $spos + 6, 8)); // It machine machine dependent
  721. if ($this->isDate($spos)) {
  722. list($string, $raw) = $this->createDate($tmp['double']);
  723. // $this->addcell(DateRecord($r, 1));
  724. }else{
  725. //$raw = $tmp[''];
  726. if (isset($this->_columnsFormat[$column + 1])){
  727. $this->curformat = $this->_columnsFormat[$column + 1];
  728. }
  729. $raw = $this->createNumber($spos);
  730. $string = sprintf($this->curformat, $raw * $this->multiplier);
  731. // $this->addcell(NumberRecord($r));
  732. }
  733. $this->addcell($row, $column, $string, $raw);
  734. //echo "Number $row $column $string\n";
  735. break;
  736. // 1030
  737. case SPREADSHEET_EXCEL_READER_TYPE_FORMULA:
  738. // 6
  739. case SPREADSHEET_EXCEL_READER_TYPE_FORMULA2:
  740. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  741. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  742. if ((ord($this->data[$spos+6])==0) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  743. //String formula. Result follows in a STRING record
  744. //echo "FORMULA $row $column Formula with a string<br>\n";
  745. } elseif ((ord($this->data[$spos+6])==1) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  746. //Boolean formula. Result is in +2; 0=false,1=true
  747. } elseif ((ord($this->data[$spos+6])==2) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  748. //Error formula. Error code is in +2;
  749. } elseif ((ord($this->data[$spos+6])==3) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  750. //Formula result is a null string.
  751. } else {
  752. // result is a number, so first 14 bytes are just like a _NUMBER record
  753. $tmp = unpack("ddouble", substr($this->data, $spos + 6, 8)); // It machine machine dependent
  754. if ($this->isDate($spos)) {
  755. list($string, $raw) = $this->createDate($tmp['double']);
  756. // $this->addcell(DateRecord($r, 1));
  757. }else{
  758. //$raw = $tmp[''];
  759. if (isset($this->_columnsFormat[$column + 1])){
  760. $this->curformat = $this->_columnsFormat[$column + 1];
  761. }
  762. $raw = $this->createNumber($spos);
  763. $string = sprintf($this->curformat, $raw * $this->multiplier);
  764. // $this->addcell(NumberRecord($r));
  765. }
  766. $this->addcell($row, $column, $string, $raw);
  767. //echo "Number $row $column $string\n";
  768. }
  769. break;
  770. // 517
  771. case SPREADSHEET_EXCEL_READER_TYPE_BOOLERR:
  772. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  773. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  774. $string = ord($this->data[$spos+6]);
  775. $this->addcell($row, $column, $string);
  776. //echo 'Type_BOOLERR '."\n";
  777. break;
  778. // 520
  779. case SPREADSHEET_EXCEL_READER_TYPE_ROW:
  780. // 215
  781. case SPREADSHEET_EXCEL_READER_TYPE_DBCELL:
  782. // 190
  783. case SPREADSHEET_EXCEL_READER_TYPE_MULBLANK:
  784. break;
  785. // 516
  786. case SPREADSHEET_EXCEL_READER_TYPE_LABEL:
  787. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  788. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  789. $this->addcell($row, $column, substr($this->data, $spos + 8, ord($this->data[$spos + 6]) | ord($this->data[$spos + 7])<<8));
  790. // $this->addcell(LabelRecord($r));
  791. break;
  792. // 10
  793. case SPREADSHEET_EXCEL_READER_TYPE_EOF:
  794. $cont = false;
  795. break;
  796. default:
  797. // echo ' unknown :'.base_convert($r['code'],10,16)."\n";
  798. break;
  799. }
  800. $spos += $length;
  801. }
  802. if (!isset($this->sheets[$this->sn]['numRows']))
  803. $this->sheets[$this->sn]['numRows'] = $this->sheets[$this->sn]['maxrow'];
  804. if (!isset($this->sheets[$this->sn]['numCols']))
  805. $this->sheets[$this->sn]['numCols'] = $this->sheets[$this->sn]['maxcol'];
  806. }
  807. /**
  808. * Check whether the current record read is a date
  809. *
  810. * @param todo
  811. * @return boolean True if date, false otherwise
  812. */
  813. function isDate($spos)
  814. {
  815. //$xfindex = GetInt2d(, 4);
  816. $xfindex = ord($this->data[$spos+4]) | ord($this->data[$spos+5]) << 8;
  817. //echo 'check is date '.$xfindex.' '.$this->formatRecords['xfrecords'][$xfindex]['type']."\n";
  818. //var_dump($this->formatRecords['xfrecords'][$xfindex]);
  819. if ($this->formatRecords['xfrecords'][$xfindex]['type'] == 'date') {
  820. $this->curformat = $this->formatRecords['xfrecords'][$xfindex]['format'];
  821. $this->rectype = 'date';
  822. return true;
  823. } else {
  824. if ($this->formatRecords['xfrecords'][$xfindex]['type'] == 'number') {
  825. $this->curformat = $this->formatRecords['xfrecords'][$xfindex]['format'];
  826. $this->rectype = 'number';
  827. if (($xfindex == 0x9) || ($xfindex == 0xa)){
  828. $this->multiplier = 100;
  829. }
  830. }else{
  831. $this->curformat = $this->_defaultFormat;
  832. $this->rectype = 'unknown';
  833. }
  834. return false;
  835. }
  836. }
  837. //}}}
  838. //{{{ createDate()
  839. /**
  840. * Convert the raw Excel date into a human readable format
  841. *
  842. * Dates in Excel are stored as number of seconds from an epoch. On
  843. * Windows, the epoch is 30/12/1899 and on Mac it's 01/01/1904
  844. *
  845. * @access private
  846. * @param integer The raw Excel value to convert
  847. * @return array First element is the converted date, the second element is number a unix timestamp
  848. */
  849. function createDate($numValue)
  850. {
  851. if ($numValue > 1) {
  852. $utcDays = $numValue - ($this->nineteenFour ? SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904 : SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS);
  853. $utcValue = round(($utcDays+1) * SPREADSHEET_EXCEL_READER_MSINADAY);
  854. $string = date ($this->curformat, $utcValue);
  855. $raw = $utcValue;
  856. } else {
  857. $raw = $numValue;
  858. $hours = floor($numValue * 24);
  859. $mins = floor($numValue * 24 * 60) - $hours * 60;
  860. $secs = floor($numValue * SPREADSHEET_EXCEL_READER_MSINADAY) - $hours * 60 * 60 - $mins * 60;
  861. $string = date ($this->curformat, mktime($hours, $mins, $secs));
  862. }
  863. return array($string, $raw);
  864. }
  865. function createNumber($spos)
  866. {
  867. $rknumhigh = $this->_GetInt4d($this->data, $spos + 10);
  868. $rknumlow = $this->_GetInt4d($this->data, $spos + 6);
  869. //for ($i=0; $i<8; $i++) { echo ord($this->data[$i+$spos+6]) . " "; } echo "<br>";
  870. $sign = ($rknumhigh & 0x80000000) >> 31;
  871. $exp = ($rknumhigh & 0x7ff00000) >> 20;
  872. $mantissa = (0x100000 | ($rknumhigh & 0x000fffff));
  873. $mantissalow1 = ($rknumlow & 0x80000000) >> 31;
  874. $mantissalow2 = ($rknumlow & 0x7fffffff);
  875. $value = $mantissa / pow( 2 , (20- ($exp - 1023)));
  876. if ($mantissalow1 != 0) $value += 1 / pow (2 , (21 - ($exp - 1023)));
  877. $value += $mantissalow2 / pow (2 , (52 - ($exp - 1023)));
  878. //echo "Sign = $sign, Exp = $exp, mantissahighx = $mantissa, mantissalow1 = $mantissalow1, mantissalow2 = $mantissalow2<br>\n";
  879. if ($sign) {$value = -1 * $value;}
  880. return $value;
  881. }
  882. function addcell($row, $col, $string, $raw = '')
  883. {
  884. // echo "ADD cel $row-$col $string\n";
  885. $this->sheets[$this->sn]['maxrow'] = max($this->sheets[$this->sn]['maxrow'], $row + $this->_rowoffset);
  886. $this->sheets[$this->sn]['maxcol'] = max($this->sheets[$this->sn]['maxcol'], $col + $this->_coloffset);
  887. $this->sheets[$this->sn]['cells'][$row + $this->_rowoffset][$col + $this->_coloffset] = $string;
  888. if ($raw)
  889. $this->sheets[$this->sn]['cellsInfo'][$row + $this->_rowoffset][$col + $this->_coloffset]['raw'] = $raw;
  890. if (isset($this->rectype))
  891. $this->sheets[$this->sn]['cellsInfo'][$row + $this->_rowoffset][$col + $this->_coloffset]['type'] = $this->rectype;
  892. }
  893. function _GetIEEE754($rknum)
  894. {
  895. if (($rknum & 0x02) != 0) {
  896. $value = $rknum >> 2;
  897. } else {
  898. //mmp
  899. // first comment out the previously existing 7 lines of code here
  900. // $tmp = unpack("d", pack("VV", 0, ($rknum & 0xfffffffc)));
  901. // //$value = $tmp[''];
  902. // if (array_key_exists(1, $tmp)) {
  903. // $value = $tmp[1];
  904. // } else {
  905. // $value = $tmp[''];
  906. // }
  907. // I got my info on IEEE754 encoding from
  908. // http://research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
  909. // The RK format calls for using only the most significant 30 bits of the
  910. // 64 bit floating point value. The other 34 bits are assumed to be 0
  911. // So, we use the upper 30 bits of $rknum as follows...
  912. $sign = ($rknum & 0x80000000) >> 31;
  913. $exp = ($rknum & 0x7ff00000) >> 20;
  914. $mantissa = (0x100000 | ($rknum & 0x000ffffc));
  915. $value = $mantissa / pow( 2 , (20- ($exp - 1023)));
  916. if ($sign) {$value = -1 * $value;}
  917. //end of changes by mmp
  918. }
  919. if (($rknum & 0x01) != 0) {
  920. $value /= 100;
  921. }
  922. return $value;
  923. }
  924. function _encodeUTF16($string)
  925. {
  926. $result = $string;
  927. if ($this->_defaultEncoding){
  928. switch ($this->_encoderFunction){
  929. case 'iconv' : $result = iconv('UTF-16LE', $this->_defaultEncoding, $string);
  930. break;
  931. case 'mb_convert_encoding' : $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' );
  932. break;
  933. }
  934. }
  935. return $result;
  936. }
  937. function _GetInt4d($data, $pos)
  938. {
  939. $value = ord($data[$pos]) | (ord($data[$pos+1]) << 8) | (ord($data[$pos+2]) << 16) | (ord($data[$pos+3]) << 24);
  940. if ($value>=4294967294)
  941. {
  942. $value=-2;
  943. }
  944. return $value;
  945. }
  946. }
  947. /*
  948. * Local variables:
  949. * tab-width: 4
  950. * c-basic-offset: 4
  951. * c-hanging-comment-ender-p: nil
  952. * End:
  953. */
  954. ?>