PageRenderTime 51ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 1ms

/inc/Excel2MySQL/reader.php

https://bitbucket.org/anak10thn/tcake
PHP | 1088 lines | 594 code | 103 blank | 391 comment | 123 complexity | 00148d4c37ed515d45bfde6cbe8284d1 MD5 | raw file
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
  3. /**
  4. * A class for reading Microsoft Excel Spreadsheets.
  5. *
  6. * Originally developed by Vadim Tkachenko under the name PHPExcelReader.
  7. * (http://sourceforge.net/projects/phpexcelreader)
  8. * Based on the Java version by Andy Khan (http://www.andykhan.com). Now
  9. * maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.
  10. *
  11. * PHP versions 4 and 5
  12. *
  13. * LICENSE: This source file is subject to version 3.0 of the PHP license
  14. * that is available through the world-wide-web at the following URI:
  15. * http://www.php.net/license/3_0.txt. If you did not receive a copy of
  16. * the PHP License and are unable to obtain it through the web, please
  17. * send a note to license@php.net so we can mail you a copy immediately.
  18. *
  19. * @category Spreadsheet
  20. * @package Spreadsheet_Excel_Reader
  21. * @author Vadim Tkachenko <vt@apachephp.com>
  22. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  23. * @version CVS: $Id: reader.php 19 2007-03-13 12:42:41Z shangxiao $
  24. * @link http://pear.php.net/package/Spreadsheet_Excel_Reader
  25. * @see OLE, Spreadsheet_Excel_Writer
  26. */
  27. //require_once 'PEAR.php';
  28. require_once 'OLERead.php';
  29. //require_once 'OLE.php';
  30. define('SPREADSHEET_EXCEL_READER_BIFF8', 0x600);
  31. define('SPREADSHEET_EXCEL_READER_BIFF7', 0x500);
  32. define('SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS', 0x5);
  33. define('SPREADSHEET_EXCEL_READER_WORKSHEET', 0x10);
  34. define('SPREADSHEET_EXCEL_READER_TYPE_BOF', 0x809);
  35. define('SPREADSHEET_EXCEL_READER_TYPE_EOF', 0x0a);
  36. define('SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET', 0x85);
  37. define('SPREADSHEET_EXCEL_READER_TYPE_DIMENSION', 0x200);
  38. define('SPREADSHEET_EXCEL_READER_TYPE_ROW', 0x208);
  39. define('SPREADSHEET_EXCEL_READER_TYPE_DBCELL', 0xd7);
  40. define('SPREADSHEET_EXCEL_READER_TYPE_FILEPASS', 0x2f);
  41. define('SPREADSHEET_EXCEL_READER_TYPE_NOTE', 0x1c);
  42. define('SPREADSHEET_EXCEL_READER_TYPE_TXO', 0x1b6);
  43. define('SPREADSHEET_EXCEL_READER_TYPE_RK', 0x7e);
  44. define('SPREADSHEET_EXCEL_READER_TYPE_RK2', 0x27e);
  45. define('SPREADSHEET_EXCEL_READER_TYPE_MULRK', 0xbd);
  46. define('SPREADSHEET_EXCEL_READER_TYPE_MULBLANK', 0xbe);
  47. define('SPREADSHEET_EXCEL_READER_TYPE_INDEX', 0x20b);
  48. define('SPREADSHEET_EXCEL_READER_TYPE_SST', 0xfc);
  49. define('SPREADSHEET_EXCEL_READER_TYPE_EXTSST', 0xff);
  50. define('SPREADSHEET_EXCEL_READER_TYPE_CONTINUE', 0x3c);
  51. define('SPREADSHEET_EXCEL_READER_TYPE_LABEL', 0x204);
  52. define('SPREADSHEET_EXCEL_READER_TYPE_LABELSST', 0xfd);
  53. define('SPREADSHEET_EXCEL_READER_TYPE_NUMBER', 0x203);
  54. define('SPREADSHEET_EXCEL_READER_TYPE_NAME', 0x18);
  55. define('SPREADSHEET_EXCEL_READER_TYPE_ARRAY', 0x221);
  56. define('SPREADSHEET_EXCEL_READER_TYPE_STRING', 0x207);
  57. define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA', 0x406);
  58. define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA2', 0x6);
  59. define('SPREADSHEET_EXCEL_READER_TYPE_FORMAT', 0x41e);
  60. define('SPREADSHEET_EXCEL_READER_TYPE_XF', 0xe0);
  61. define('SPREADSHEET_EXCEL_READER_TYPE_BOOLERR', 0x205);
  62. define('SPREADSHEET_EXCEL_READER_TYPE_UNKNOWN', 0xffff);
  63. define('SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR', 0x22);
  64. define('SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS', 0xE5);
  65. define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS' , 25569);
  66. define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904', 24107);
  67. define('SPREADSHEET_EXCEL_READER_MSINADAY', 86400);
  68. //define('SPREADSHEET_EXCEL_READER_MSINADAY', 24 * 60 * 60);
  69. //define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%.2f");
  70. define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%s");
  71. /*
  72. * Place includes, constant defines and $_GLOBAL settings here.
  73. * Make sure they have appropriate docblocks to avoid phpDocumentor
  74. * construing they are documented by the page-level docblock.
  75. */
  76. /**
  77. * A class for reading Microsoft Excel Spreadsheets.
  78. *
  79. * Originally developed by Vadim Tkachenko under the name PHPExcelReader.
  80. * (http://sourceforge.net/projects/phpexcelreader)
  81. * Based on the Java version by Andy Khan (http://www.andykhan.com). Now
  82. * maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.
  83. *
  84. * @category Spreadsheet
  85. * @package Spreadsheet_Excel_Reader
  86. * @author Vadim Tkachenko <vt@phpapache.com>
  87. * @copyright 1997-2005 The PHP Group
  88. * @license http://www.php.net/license/3_0.txt PHP License 3.0
  89. * @version Release: @package_version@
  90. * @link http://pear.php.net/package/PackageName
  91. * @see OLE, Spreadsheet_Excel_Writer
  92. */
  93. class Spreadsheet_Excel_Reader
  94. {
  95. /**
  96. * Array of worksheets found
  97. *
  98. * @var array
  99. * @access public
  100. */
  101. var $boundsheets = array();
  102. /**
  103. * Array of format records found
  104. *
  105. * @var array
  106. * @access public
  107. */
  108. var $formatRecords = array();
  109. /**
  110. * todo
  111. *
  112. * @var array
  113. * @access public
  114. */
  115. var $sst = array();
  116. /**
  117. * Array of worksheets
  118. *
  119. * The data is stored in 'cells' and the meta-data is stored in an array
  120. * called 'cellsInfo'
  121. *
  122. * Example:
  123. *
  124. * $sheets --> 'cells' --> row --> column --> Interpreted value
  125. * --> 'cellsInfo' --> row --> column --> 'type' - Can be 'date', 'number', or 'unknown'
  126. * --> 'raw' - The raw data that Excel stores for that data cell
  127. *
  128. * @var array
  129. * @access public
  130. */
  131. var $sheets = array();
  132. /**
  133. * The data returned by OLE
  134. *
  135. * @var string
  136. * @access public
  137. */
  138. var $data;
  139. /**
  140. * OLE object for reading the file
  141. *
  142. * @var OLE object
  143. * @access private
  144. */
  145. var $_ole;
  146. /**
  147. * Default encoding
  148. *
  149. * @var string
  150. * @access private
  151. */
  152. var $_defaultEncoding;
  153. /**
  154. * Default number format
  155. *
  156. * @var integer
  157. * @access private
  158. */
  159. var $_defaultFormat = SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT;
  160. /**
  161. * todo
  162. * List of formats to use for each column
  163. *
  164. * @var array
  165. * @access private
  166. */
  167. var $_columnsFormat = array();
  168. /**
  169. * todo
  170. *
  171. * @var integer
  172. * @access private
  173. */
  174. var $_rowoffset = 1;
  175. /**
  176. * todo
  177. *
  178. * @var integer
  179. * @access private
  180. */
  181. var $_coloffset = 1;
  182. /**
  183. * List of default date formats used by Excel
  184. *
  185. * @var array
  186. * @access public
  187. */
  188. var $dateFormats = array (
  189. 0xe => "d/m/Y",
  190. 0xf => "d-M-Y",
  191. 0x10 => "d-M",
  192. 0x11 => "M-Y",
  193. 0x12 => "h:i a",
  194. 0x13 => "h:i:s a",
  195. 0x14 => "H:i",
  196. 0x15 => "H:i:s",
  197. 0x16 => "d/m/Y H:i",
  198. 0x2d => "i:s",
  199. 0x2e => "H:i:s",
  200. 0x2f => "i:s.S");
  201. /**
  202. * Default number formats used by Excel
  203. *
  204. * @var array
  205. * @access public
  206. */
  207. var $numberFormats = array(
  208. 0x1 => "%1.0f", // "0"
  209. 0x2 => "%1.2f", // "0.00",
  210. 0x3 => "%1.0f", //"#,##0",
  211. 0x4 => "%1.2f", //"#,##0.00",
  212. 0x5 => "%1.0f", /*"$#,##0;($#,##0)",*/
  213. 0x6 => '$%1.0f', /*"$#,##0;($#,##0)",*/
  214. 0x7 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  215. 0x8 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  216. 0x9 => '%1.0f%%', // "0%"
  217. 0xa => '%1.2f%%', // "0.00%"
  218. 0xb => '%1.2f', // 0.00E00",
  219. 0x25 => '%1.0f', // "#,##0;(#,##0)",
  220. 0x26 => '%1.0f', //"#,##0;(#,##0)",
  221. 0x27 => '%1.2f', //"#,##0.00;(#,##0.00)",
  222. 0x28 => '%1.2f', //"#,##0.00;(#,##0.00)",
  223. 0x29 => '%1.0f', //"#,##0;(#,##0)",
  224. 0x2a => '$%1.0f', //"$#,##0;($#,##0)",
  225. 0x2b => '%1.2f', //"#,##0.00;(#,##0.00)",
  226. 0x2c => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  227. 0x30 => '%1.0f'); //"##0.0E0";
  228. // }}}
  229. // {{{ Spreadsheet_Excel_Reader()
  230. /**
  231. * Constructor
  232. *
  233. * Some basic initialisation
  234. */
  235. function Spreadsheet_Excel_Reader()
  236. {
  237. $this->_ole =& new OLERead();
  238. $this->setUTFEncoder('iconv');
  239. }
  240. // }}}
  241. // {{{ setOutputEncoding()
  242. /**
  243. * Set the encoding method
  244. *
  245. * @param string Encoding to use
  246. * @access public
  247. */
  248. function setOutputEncoding($encoding)
  249. {
  250. $this->_defaultEncoding = $encoding;
  251. }
  252. // }}}
  253. // {{{ setUTFEncoder()
  254. /**
  255. * $encoder = 'iconv' or 'mb'
  256. * set iconv if you would like use 'iconv' for encode UTF-16LE to your encoding
  257. * set mb if you would like use 'mb_convert_encoding' for encode UTF-16LE to your encoding
  258. *
  259. * @access public
  260. * @param string Encoding type to use. Either 'iconv' or 'mb'
  261. */
  262. function setUTFEncoder($encoder = 'iconv')
  263. {
  264. $this->_encoderFunction = '';
  265. if ($encoder == 'iconv') {
  266. $this->_encoderFunction = function_exists('iconv') ? 'iconv' : '';
  267. } elseif ($encoder == 'mb') {
  268. $this->_encoderFunction = function_exists('mb_convert_encoding') ?
  269. 'mb_convert_encoding' :
  270. '';
  271. }
  272. }
  273. // }}}
  274. // {{{ setRowColOffset()
  275. /**
  276. * todo
  277. *
  278. * @access public
  279. * @param offset
  280. */
  281. function setRowColOffset($iOffset)
  282. {
  283. $this->_rowoffset = $iOffset;
  284. $this->_coloffset = $iOffset;
  285. }
  286. // }}}
  287. // {{{ setDefaultFormat()
  288. /**
  289. * Set the default number format
  290. *
  291. * @access public
  292. * @param Default format
  293. */
  294. function setDefaultFormat($sFormat)
  295. {
  296. $this->_defaultFormat = $sFormat;
  297. }
  298. // }}}
  299. // {{{ setColumnFormat()
  300. /**
  301. * Force a column to use a certain format
  302. *
  303. * @access public
  304. * @param integer Column number
  305. * @param string Format
  306. */
  307. function setColumnFormat($column, $sFormat)
  308. {
  309. $this->_columnsFormat[$column] = $sFormat;
  310. }
  311. // }}}
  312. // {{{ read()
  313. /**
  314. * Read the spreadsheet file using OLE, then parse
  315. *
  316. * @access public
  317. * @param filename
  318. * @todo return a valid value
  319. */
  320. function read($sFileName)
  321. {
  322. /*
  323. require_once 'OLE.php';
  324. $ole = new OLE();
  325. $ole->read($sFileName);
  326. foreach ($ole->_list as $i => $pps) {
  327. if (($pps->Name == 'Workbook' || $pps->Name == 'Book') &&
  328. $pps->Size >= SMALL_BLOCK_THRESHOLD) {
  329. $this->data = $ole->getData($i, 0, $ole->getDataLength($i));
  330. } elseif ($pps->Name == 'Root Entry') {
  331. $this->data = $ole->getData($i, 0, $ole->getDataLength($i));
  332. }
  333. //var_dump(strlen($ole->getData($i, 0, $ole->getDataLength($i))), $pps->Name, md5($this->data), $ole->getDataLength($i));
  334. }
  335. //exit;
  336. $this->_parse();
  337. return sizeof($this->sheets) > 0;
  338. */
  339. $res = $this->_ole->read($sFileName);
  340. // oops, something goes wrong (Darko Miljanovic)
  341. if($res === false) {
  342. // check error code
  343. if($this->_ole->error == 1) {
  344. // bad file
  345. die('The filename ' . $sFileName . ' is not readable');
  346. }
  347. // check other error codes here (eg bad fileformat, etc...)
  348. }
  349. $this->data = $this->_ole->getWorkBook();
  350. /*
  351. $res = $this->_ole->read($sFileName);
  352. if ($this->isError($res)) {
  353. // var_dump($res);
  354. return $this->raiseError($res);
  355. }
  356. $total = $this->_ole->ppsTotal();
  357. for ($i = 0; $i < $total; $i++) {
  358. if ($this->_ole->isFile($i)) {
  359. $type = unpack("v", $this->_ole->getData($i, 0, 2));
  360. if ($type[''] == 0x0809) { // check if it's a BIFF stream
  361. $this->_index = $i;
  362. $this->data = $this->_ole->getData($i, 0, $this->_ole->getDataLength($i));
  363. break;
  364. }
  365. }
  366. }
  367. if ($this->_index === null) {
  368. return $this->raiseError("$file doesn't seem to be an Excel file");
  369. }
  370. */
  371. //echo "data =".$this->data;
  372. //$this->readRecords();
  373. $this->_parse();
  374. }
  375. // }}}
  376. // {{{ _parse()
  377. /**
  378. * Parse a workbook
  379. *
  380. * @access private
  381. * @return bool
  382. */
  383. function _parse()
  384. {
  385. $pos = 0;
  386. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  387. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  388. $version = ord($this->data[$pos + 4]) | ord($this->data[$pos + 5])<<8;
  389. $substreamType = ord($this->data[$pos + 6]) | ord($this->data[$pos + 7])<<8;
  390. //echo "Start parse code=".base_convert($code,10,16)." version=".base_convert($version,10,16)." substreamType=".base_convert($substreamType,10,16).""."\n";
  391. if (($version != SPREADSHEET_EXCEL_READER_BIFF8) &&
  392. ($version != SPREADSHEET_EXCEL_READER_BIFF7)) {
  393. return false;
  394. }
  395. if ($substreamType != SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS){
  396. return false;
  397. }
  398. //print_r($rec);
  399. $pos += $length + 4;
  400. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  401. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  402. while ($code != SPREADSHEET_EXCEL_READER_TYPE_EOF) {
  403. switch ($code) {
  404. case SPREADSHEET_EXCEL_READER_TYPE_SST:
  405. //echo "Type_SST\n";
  406. $spos = $pos + 4;
  407. $limitpos = $spos + $length;
  408. $uniqueStrings = $this->_GetInt4d($this->data, $spos+4);
  409. $spos += 8;
  410. for ($i = 0; $i < $uniqueStrings; $i++) {
  411. // Read in the number of characters
  412. if ($spos == $limitpos) {
  413. $opcode = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  414. $conlength = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  415. if ($opcode != 0x3c) {
  416. return -1;
  417. }
  418. $spos += 4;
  419. $limitpos = $spos + $conlength;
  420. }
  421. $numChars = ord($this->data[$spos]) | (ord($this->data[$spos+1]) << 8);
  422. //echo "i = $i pos = $pos numChars = $numChars ";
  423. $spos += 2;
  424. $optionFlags = ord($this->data[$spos]);
  425. $spos++;
  426. $asciiEncoding = (($optionFlags & 0x01) == 0) ;
  427. $extendedString = ( ($optionFlags & 0x04) != 0);
  428. // See if string contains formatting information
  429. $richString = ( ($optionFlags & 0x08) != 0);
  430. if ($richString) {
  431. // Read in the crun
  432. $formattingRuns = ord($this->data[$spos]) | (ord($this->data[$spos+1]) << 8);
  433. $spos += 2;
  434. }
  435. if ($extendedString) {
  436. // Read in cchExtRst
  437. $extendedRunLength = $this->_GetInt4d($this->data, $spos);
  438. $spos += 4;
  439. }
  440. $len = ($asciiEncoding)? $numChars : $numChars*2;
  441. if ($spos + $len < $limitpos) {
  442. $retstr = substr($this->data, $spos, $len);
  443. $spos += $len;
  444. }else{
  445. // found countinue
  446. $retstr = substr($this->data, $spos, $limitpos - $spos);
  447. $bytesRead = $limitpos - $spos;
  448. $charsLeft = $numChars - (($asciiEncoding) ? $bytesRead : ($bytesRead / 2));
  449. $spos = $limitpos;
  450. while ($charsLeft > 0){
  451. $opcode = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  452. $conlength = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  453. if ($opcode != 0x3c) {
  454. return -1;
  455. }
  456. $spos += 4;
  457. $limitpos = $spos + $conlength;
  458. $option = ord($this->data[$spos]);
  459. $spos += 1;
  460. if ($asciiEncoding && ($option == 0)) {
  461. $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength);
  462. $retstr .= substr($this->data, $spos, $len);
  463. $charsLeft -= $len;
  464. $asciiEncoding = true;
  465. }elseif (!$asciiEncoding && ($option != 0)){
  466. $len = min($charsLeft * 2, $limitpos - $spos); // min($charsLeft, $conlength);
  467. $retstr .= substr($this->data, $spos, $len);
  468. $charsLeft -= $len/2;
  469. $asciiEncoding = false;
  470. }elseif (!$asciiEncoding && ($option == 0)) {
  471. // Bummer - the string starts off as Unicode, but after the
  472. // continuation it is in straightforward ASCII encoding
  473. $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength);
  474. for ($j = 0; $j < $len; $j++) {
  475. $retstr .= $this->data[$spos + $j].chr(0);
  476. }
  477. $charsLeft -= $len;
  478. $asciiEncoding = false;
  479. }else{
  480. $newstr = '';
  481. for ($j = 0; $j < strlen($retstr); $j++) {
  482. $newstr = $retstr[$j].chr(0);
  483. }
  484. $retstr = $newstr;
  485. $len = min($charsLeft * 2, $limitpos - $spos); // min($charsLeft, $conlength);
  486. $retstr .= substr($this->data, $spos, $len);
  487. $charsLeft -= $len/2;
  488. $asciiEncoding = false;
  489. //echo "Izavrat\n";
  490. }
  491. $spos += $len;
  492. }
  493. }
  494. $retstr = ($asciiEncoding) ? $retstr : $this->_encodeUTF16($retstr);
  495. // echo "Str $i = $retstr\n";
  496. if ($richString){
  497. $spos += 4 * $formattingRuns;
  498. }
  499. // For extended strings, skip over the extended string data
  500. if ($extendedString) {
  501. $spos += $extendedRunLength;
  502. }
  503. //if ($retstr == 'Derby'){
  504. // echo "bb\n";
  505. //}
  506. $this->sst[]=$retstr;
  507. }
  508. /*$continueRecords = array();
  509. while ($this->getNextCode() == Type_CONTINUE) {
  510. $continueRecords[] = &$this->nextRecord();
  511. }
  512. //echo " 1 Type_SST\n";
  513. $this->shareStrings = new SSTRecord($r, $continueRecords);
  514. //print_r($this->shareStrings->strings);
  515. */
  516. // echo 'SST read: '.($time_end-$time_start)."\n";
  517. break;
  518. case SPREADSHEET_EXCEL_READER_TYPE_FILEPASS:
  519. return false;
  520. break;
  521. case SPREADSHEET_EXCEL_READER_TYPE_NAME:
  522. //echo "Type_NAME\n";
  523. break;
  524. case SPREADSHEET_EXCEL_READER_TYPE_FORMAT:
  525. $indexCode = ord($this->data[$pos+4]) | ord($this->data[$pos+5]) << 8;
  526. if ($version == SPREADSHEET_EXCEL_READER_BIFF8) {
  527. $numchars = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) << 8;
  528. if (ord($this->data[$pos+8]) == 0){
  529. $formatString = substr($this->data, $pos+9, $numchars);
  530. } else {
  531. $formatString = substr($this->data, $pos+9, $numchars*2);
  532. }
  533. } else {
  534. $numchars = ord($this->data[$pos+6]);
  535. $formatString = substr($this->data, $pos+7, $numchars*2);
  536. }
  537. $this->formatRecords[$indexCode] = $formatString;
  538. // echo "Type.FORMAT\n";
  539. break;
  540. case SPREADSHEET_EXCEL_READER_TYPE_XF:
  541. //global $dateFormats, $numberFormats;
  542. $indexCode = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) << 8;
  543. //echo "\nType.XF ".count($this->formatRecords['xfrecords'])." $indexCode ";
  544. if (array_key_exists($indexCode, $this->dateFormats)) {
  545. //echo "isdate ".$this->dateFormats[$indexCode];
  546. $this->formatRecords['xfrecords'][] = array(
  547. 'type' => 'date',
  548. 'format' => $this->dateFormats[$indexCode]
  549. );
  550. }elseif (array_key_exists($indexCode, $this->numberFormats)) {
  551. //echo "isnumber ".$this->numberFormats[$indexCode];
  552. $this->formatRecords['xfrecords'][] = array(
  553. 'type' => 'number',
  554. 'format' => $this->numberFormats[$indexCode]
  555. );
  556. }else{
  557. $isdate = FALSE;
  558. if ($indexCode > 0){
  559. if (isset($this->formatRecords[$indexCode]))
  560. $formatstr = $this->formatRecords[$indexCode];
  561. //echo '.other.';
  562. //echo "\ndate-time=$formatstr=\n";
  563. if ($formatstr)
  564. if (preg_match("/[^hmsday\/\-:\s]/i", $formatstr) == 0) { // found day and time format
  565. $isdate = TRUE;
  566. $formatstr = str_replace('mm', 'i', $formatstr);
  567. $formatstr = str_replace('h', 'H', $formatstr);
  568. // echo "\ndate-time $formatstr \n";
  569. }
  570. }
  571. if ($isdate){
  572. $this->formatRecords['xfrecords'][] = array(
  573. 'type' => 'date',
  574. 'format' => $formatstr,
  575. );
  576. }else{
  577. $this->formatRecords['xfrecords'][] = array(
  578. 'type' => 'other',
  579. 'format' => '',
  580. 'code' => $indexCode
  581. );
  582. }
  583. }
  584. //echo "\n";
  585. break;
  586. case SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR:
  587. //echo "Type.NINETEENFOUR\n";
  588. $this->nineteenFour = (ord($this->data[$pos+4]) == 1);
  589. break;
  590. case SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET:
  591. //echo "Type.BOUNDSHEET\n";
  592. $rec_offset = $this->_GetInt4d($this->data, $pos+4);
  593. $rec_typeFlag = ord($this->data[$pos+8]);
  594. $rec_visibilityFlag = ord($this->data[$pos+9]);
  595. $rec_length = ord($this->data[$pos+10]);
  596. if ($version == SPREADSHEET_EXCEL_READER_BIFF8){
  597. $chartype = ord($this->data[$pos+11]);
  598. if ($chartype == 0){
  599. $rec_name = substr($this->data, $pos+12, $rec_length);
  600. } else {
  601. $rec_name = $this->_encodeUTF16(substr($this->data, $pos+12, $rec_length*2));
  602. }
  603. }elseif ($version == SPREADSHEET_EXCEL_READER_BIFF7){
  604. $rec_name = substr($this->data, $pos+11, $rec_length);
  605. }
  606. $this->boundsheets[] = array('name'=>$rec_name,
  607. 'offset'=>$rec_offset);
  608. break;
  609. }
  610. //echo "Code = ".base_convert($r['code'],10,16)."\n";
  611. $pos += $length + 4;
  612. $code = ord($this->data[$pos]) | ord($this->data[$pos+1])<<8;
  613. $length = ord($this->data[$pos+2]) | ord($this->data[$pos+3])<<8;
  614. //$r = &$this->nextRecord();
  615. //echo "1 Code = ".base_convert($r['code'],10,16)."\n";
  616. }
  617. foreach ($this->boundsheets as $key=>$val){
  618. $this->sn = $key;
  619. $this->_parsesheet($val['offset']);
  620. }
  621. return true;
  622. }
  623. /**
  624. * Parse a worksheet
  625. *
  626. * @access private
  627. * @param todo
  628. * @todo fix return codes
  629. */
  630. function _parsesheet($spos)
  631. {
  632. $cont = true;
  633. // read BOF
  634. $code = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  635. $length = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  636. $version = ord($this->data[$spos + 4]) | ord($this->data[$spos + 5])<<8;
  637. $substreamType = ord($this->data[$spos + 6]) | ord($this->data[$spos + 7])<<8;
  638. if (($version != SPREADSHEET_EXCEL_READER_BIFF8) && ($version != SPREADSHEET_EXCEL_READER_BIFF7)) {
  639. return -1;
  640. }
  641. if ($substreamType != SPREADSHEET_EXCEL_READER_WORKSHEET){
  642. return -2;
  643. }
  644. //echo "Start parse code=".base_convert($code,10,16)." version=".base_convert($version,10,16)." substreamType=".base_convert($substreamType,10,16).""."\n";
  645. $spos += $length + 4;
  646. //var_dump($this->formatRecords);
  647. //echo "code $code $length";
  648. while($cont) {
  649. //echo "mem= ".memory_get_usage()."\n";
  650. // $r = &$this->file->nextRecord();
  651. $lowcode = ord($this->data[$spos]);
  652. if ($lowcode == SPREADSHEET_EXCEL_READER_TYPE_EOF) break;
  653. $code = $lowcode | ord($this->data[$spos+1])<<8;
  654. $length = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  655. $spos += 4;
  656. $this->sheets[$this->sn]['maxrow'] = $this->_rowoffset - 1;
  657. $this->sheets[$this->sn]['maxcol'] = $this->_coloffset - 1;
  658. //echo "Code=".base_convert($code,10,16)." $code\n";
  659. unset($this->rectype);
  660. $this->multiplier = 1; // need for format with %
  661. switch ($code) {
  662. case SPREADSHEET_EXCEL_READER_TYPE_DIMENSION:
  663. //echo 'Type_DIMENSION ';
  664. if (!isset($this->numRows)) {
  665. if (($length == 10) || ($version == SPREADSHEET_EXCEL_READER_BIFF7)){
  666. $this->sheets[$this->sn]['numRows'] = ord($this->data[$spos+2]) | ord($this->data[$spos+3]) << 8;
  667. $this->sheets[$this->sn]['numCols'] = ord($this->data[$spos+6]) | ord($this->data[$spos+7]) << 8;
  668. } else {
  669. $this->sheets[$this->sn]['numRows'] = ord($this->data[$spos+4]) | ord($this->data[$spos+5]) << 8;
  670. $this->sheets[$this->sn]['numCols'] = ord($this->data[$spos+10]) | ord($this->data[$spos+11]) << 8;
  671. }
  672. }
  673. //echo 'numRows '.$this->numRows.' '.$this->numCols."\n";
  674. break;
  675. case SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS:
  676. $cellRanges = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  677. for ($i = 0; $i < $cellRanges; $i++) {
  678. $fr = ord($this->data[$spos + 8*$i + 2]) | ord($this->data[$spos + 8*$i + 3])<<8;
  679. $lr = ord($this->data[$spos + 8*$i + 4]) | ord($this->data[$spos + 8*$i + 5])<<8;
  680. $fc = ord($this->data[$spos + 8*$i + 6]) | ord($this->data[$spos + 8*$i + 7])<<8;
  681. $lc = ord($this->data[$spos + 8*$i + 8]) | ord($this->data[$spos + 8*$i + 9])<<8;
  682. //$this->sheets[$this->sn]['mergedCells'][] = array($fr + 1, $fc + 1, $lr + 1, $lc + 1);
  683. if ($lr - $fr > 0) {
  684. $this->sheets[$this->sn]['cellsInfo'][$fr+1][$fc+1]['rowspan'] = $lr - $fr + 1;
  685. }
  686. if ($lc - $fc > 0) {
  687. $this->sheets[$this->sn]['cellsInfo'][$fr+1][$fc+1]['colspan'] = $lc - $fc + 1;
  688. }
  689. }
  690. //echo "Merged Cells $cellRanges $lr $fr $lc $fc\n";
  691. break;
  692. case SPREADSHEET_EXCEL_READER_TYPE_RK:
  693. case SPREADSHEET_EXCEL_READER_TYPE_RK2:
  694. //echo 'SPREADSHEET_EXCEL_READER_TYPE_RK'."\n";
  695. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  696. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  697. $rknum = $this->_GetInt4d($this->data, $spos + 6);
  698. $numValue = $this->_GetIEEE754($rknum);
  699. //echo $spos.' -> '.$rknum.'<br>';
  700. //echo $numValue.'<br><br>';
  701. if ($this->isDate($spos)) { //$this->isDate($spos)
  702. list($string, $raw) = $this->createDate($numValue);
  703. }else{
  704. $raw = $numValue;
  705. if (isset($this->_columnsFormat[$column + 1])){
  706. $this->curformat = $this->_columnsFormat[$column + 1];
  707. }
  708. $string = sprintf($this->curformat, $numValue * $this->multiplier);
  709. //$this->addcell(RKRecord($r));
  710. }
  711. $this->addcell($row, $column, $string, $raw);
  712. //echo "Type_RK $row $column $string $raw {$this->curformat}\n";
  713. break;
  714. case SPREADSHEET_EXCEL_READER_TYPE_LABELSST:
  715. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  716. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  717. $xfindex = ord($this->data[$spos+4]) | ord($this->data[$spos+5])<<8;
  718. $index = $this->_GetInt4d($this->data, $spos + 6);
  719. //var_dump($this->sst);
  720. $this->addcell($row, $column, $this->sst[$index]);
  721. //echo "LabelSST $row $column $string\n";
  722. break;
  723. case SPREADSHEET_EXCEL_READER_TYPE_MULRK:
  724. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  725. $colFirst = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  726. $colLast = ord($this->data[$spos + $length - 2]) | ord($this->data[$spos + $length - 1])<<8;
  727. $columns = $colLast - $colFirst + 1;
  728. $tmppos = $spos+4;
  729. for ($i = 0; $i < $columns; $i++) {
  730. $numValue = $this->_GetIEEE754($this->_GetInt4d($this->data, $tmppos + 2));
  731. if ($this->isDate($tmppos-4)) {
  732. list($string, $raw) = $this->createDate($numValue);
  733. }else{
  734. $raw = $numValue;
  735. if (isset($this->_columnsFormat[$colFirst + $i + 1])){
  736. $this->curformat = $this->_columnsFormat[$colFirst + $i + 1];
  737. }
  738. $string = sprintf($this->curformat, $numValue * $this->multiplier);
  739. }
  740. //$rec['rknumbers'][$i]['xfindex'] = ord($rec['data'][$pos]) | ord($rec['data'][$pos+1]) << 8;
  741. $tmppos += 6;
  742. $this->addcell($row, $colFirst + $i, $string, $raw);
  743. //echo "MULRK $row ".($colFirst + $i)." $string\n";
  744. }
  745. //MulRKRecord($r);
  746. // Get the individual cell records from the multiple record
  747. //$num = ;
  748. break;
  749. case SPREADSHEET_EXCEL_READER_TYPE_NUMBER:
  750. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  751. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  752. $tmp = unpack("ddouble", substr($this->data, $spos + 6, 8)); // It machine machine dependent
  753. if ($this->isDate($spos)) {
  754. list($string, $raw) = $this->createDate($tmp['double']);
  755. // $this->addcell(DateRecord($r, 1));
  756. }else{
  757. //$raw = $tmp[''];
  758. if (isset($this->_columnsFormat[$column + 1])){
  759. $this->curformat = $this->_columnsFormat[$column + 1];
  760. }
  761. $raw = $this->createNumber($spos);
  762. $string = sprintf($this->curformat, $raw * $this->multiplier);
  763. // $this->addcell(NumberRecord($r));
  764. }
  765. $this->addcell($row, $column, $string, $raw);
  766. //echo "Number $row $column $string\n";
  767. break;
  768. case SPREADSHEET_EXCEL_READER_TYPE_FORMULA:
  769. case SPREADSHEET_EXCEL_READER_TYPE_FORMULA2:
  770. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  771. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  772. if ((ord($this->data[$spos+6])==0) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  773. //String formula. Result follows in a STRING record
  774. //echo "FORMULA $row $column Formula with a string<br>\n";
  775. } elseif ((ord($this->data[$spos+6])==1) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  776. //Boolean formula. Result is in +2; 0=false,1=true
  777. } elseif ((ord($this->data[$spos+6])==2) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  778. //Error formula. Error code is in +2;
  779. } elseif ((ord($this->data[$spos+6])==3) && (ord($this->data[$spos+12])==255) && (ord($this->data[$spos+13])==255)) {
  780. //Formula result is a null string.
  781. } else {
  782. // result is a number, so first 14 bytes are just like a _NUMBER record
  783. $tmp = unpack("ddouble", substr($this->data, $spos + 6, 8)); // It machine machine dependent
  784. if ($this->isDate($spos)) {
  785. list($string, $raw) = $this->createDate($tmp['double']);
  786. // $this->addcell(DateRecord($r, 1));
  787. }else{
  788. //$raw = $tmp[''];
  789. if (isset($this->_columnsFormat[$column + 1])){
  790. $this->curformat = $this->_columnsFormat[$column + 1];
  791. }
  792. $raw = $this->createNumber($spos);
  793. $string = sprintf($this->curformat, $raw * $this->multiplier);
  794. // $this->addcell(NumberRecord($r));
  795. }
  796. $this->addcell($row, $column, $string, $raw);
  797. //echo "Number $row $column $string\n";
  798. }
  799. break;
  800. case SPREADSHEET_EXCEL_READER_TYPE_BOOLERR:
  801. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  802. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  803. $string = ord($this->data[$spos+6]);
  804. $this->addcell($row, $column, $string);
  805. //echo 'Type_BOOLERR '."\n";
  806. break;
  807. case SPREADSHEET_EXCEL_READER_TYPE_ROW:
  808. case SPREADSHEET_EXCEL_READER_TYPE_DBCELL:
  809. case SPREADSHEET_EXCEL_READER_TYPE_MULBLANK:
  810. break;
  811. case SPREADSHEET_EXCEL_READER_TYPE_LABEL:
  812. $row = ord($this->data[$spos]) | ord($this->data[$spos+1])<<8;
  813. $column = ord($this->data[$spos+2]) | ord($this->data[$spos+3])<<8;
  814. $this->addcell($row, $column, substr($this->data, $spos + 8, ord($this->data[$spos + 6]) | ord($this->data[$spos + 7])<<8));
  815. // $this->addcell(LabelRecord($r));
  816. break;
  817. case SPREADSHEET_EXCEL_READER_TYPE_EOF:
  818. $cont = false;
  819. break;
  820. default:
  821. //echo ' unknown :'.base_convert($r['code'],10,16)."\n";
  822. break;
  823. }
  824. $spos += $length;
  825. }
  826. if (!isset($this->sheets[$this->sn]['numRows']))
  827. $this->sheets[$this->sn]['numRows'] = $this->sheets[$this->sn]['maxrow'];
  828. if (!isset($this->sheets[$this->sn]['numCols']))
  829. $this->sheets[$this->sn]['numCols'] = $this->sheets[$this->sn]['maxcol'];
  830. }
  831. /**
  832. * Check whether the current record read is a date
  833. *
  834. * @param todo
  835. * @return boolean True if date, false otherwise
  836. */
  837. function isDate($spos)
  838. {
  839. //$xfindex = GetInt2d(, 4);
  840. $xfindex = ord($this->data[$spos+4]) | ord($this->data[$spos+5]) << 8;
  841. //echo 'check is date '.$xfindex.' '.$this->formatRecords['xfrecords'][$xfindex]['type']."\n";
  842. //var_dump($this->formatRecords['xfrecords'][$xfindex]);
  843. if ($this->formatRecords['xfrecords'][$xfindex]['type'] == 'date') {
  844. $this->curformat = $this->formatRecords['xfrecords'][$xfindex]['format'];
  845. $this->rectype = 'date';
  846. return true;
  847. } else {
  848. if ($this->formatRecords['xfrecords'][$xfindex]['type'] == 'number') {
  849. $this->curformat = $this->formatRecords['xfrecords'][$xfindex]['format'];
  850. $this->rectype = 'number';
  851. if (($xfindex == 0x9) || ($xfindex == 0xa)){
  852. $this->multiplier = 100;
  853. }
  854. }else{
  855. $this->curformat = $this->_defaultFormat;
  856. $this->rectype = 'unknown';
  857. }
  858. return false;
  859. }
  860. }
  861. //}}}
  862. //{{{ createDate()
  863. /**
  864. * Convert the raw Excel date into a human readable format
  865. *
  866. * Dates in Excel are stored as number of seconds from an epoch. On
  867. * Windows, the epoch is 30/12/1899 and on Mac it's 01/01/1904
  868. *
  869. * @access private
  870. * @param integer The raw Excel value to convert
  871. * @return array First element is the converted date, the second element is number a unix timestamp
  872. */
  873. function createDate($numValue)
  874. {
  875. if ($numValue > 1) {
  876. $utcDays = $numValue - ($this->nineteenFour ? SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904 : SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS);
  877. $utcValue = round(($utcDays+1) * SPREADSHEET_EXCEL_READER_MSINADAY);
  878. $string = date ($this->curformat, $utcValue);
  879. $raw = $utcValue;
  880. } else {
  881. $raw = $numValue;
  882. $hours = floor($numValue * 24);
  883. $mins = floor($numValue * 24 * 60) - $hours * 60;
  884. $secs = floor($numValue * SPREADSHEET_EXCEL_READER_MSINADAY) - $hours * 60 * 60 - $mins * 60;
  885. $string = date ($this->curformat, mktime($hours, $mins, $secs));
  886. }
  887. return array($string, $raw);
  888. }
  889. function createNumber($spos)
  890. {
  891. $rknumhigh = $this->_GetInt4d($this->data, $spos + 10);
  892. $rknumlow = $this->_GetInt4d($this->data, $spos + 6);
  893. //for ($i=0; $i<8; $i++) { echo ord($this->data[$i+$spos+6]) . " "; } echo "<br>";
  894. $sign = ($rknumhigh & 0x80000000) >> 31;
  895. $exp = ($rknumhigh & 0x7ff00000) >> 20;
  896. $mantissa = (0x100000 | ($rknumhigh & 0x000fffff));
  897. $mantissalow1 = ($rknumlow & 0x80000000) >> 31;
  898. $mantissalow2 = ($rknumlow & 0x7fffffff);
  899. $value = $mantissa / pow( 2 , (20- ($exp - 1023)));
  900. if ($mantissalow1 != 0) $value += 1 / pow (2 , (21 - ($exp - 1023)));
  901. $value += $mantissalow2 / pow (2 , (52 - ($exp - 1023)));
  902. //echo "Sign = $sign, Exp = $exp, mantissahighx = $mantissa, mantissalow1 = $mantissalow1, mantissalow2 = $mantissalow2<br>\n";
  903. if ($sign) {$value = -1 * $value;}
  904. return $value;
  905. }
  906. function addcell($row, $col, $string, $raw = '')
  907. {
  908. //echo "ADD cel $row-$col $string\n";
  909. $this->sheets[$this->sn]['maxrow'] = max($this->sheets[$this->sn]['maxrow'], $row + $this->_rowoffset);
  910. $this->sheets[$this->sn]['maxcol'] = max($this->sheets[$this->sn]['maxcol'], $col + $this->_coloffset);
  911. $this->sheets[$this->sn]['cells'][$row + $this->_rowoffset][$col + $this->_coloffset] = $string;
  912. if ($raw)
  913. $this->sheets[$this->sn]['cellsInfo'][$row + $this->_rowoffset][$col + $this->_coloffset]['raw'] = $raw;
  914. if (isset($this->rectype))
  915. $this->sheets[$this->sn]['cellsInfo'][$row + $this->_rowoffset][$col + $this->_coloffset]['type'] = $this->rectype;
  916. }
  917. function _GetIEEE754($rknum)
  918. {
  919. if (($rknum & 0x02) != 0) {
  920. $value = $rknum >> 2;
  921. } else {
  922. //mmp
  923. // first comment out the previously existing 7 lines of code here
  924. // $tmp = unpack("d", pack("VV", 0, ($rknum & 0xfffffffc)));
  925. // //$value = $tmp[''];
  926. // if (array_key_exists(1, $tmp)) {
  927. // $value = $tmp[1];
  928. // } else {
  929. // $value = $tmp[''];
  930. // }
  931. // I got my info on IEEE754 encoding from
  932. // http://research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
  933. // The RK format calls for using only the most significant 30 bits of the
  934. // 64 bit floating point value. The other 34 bits are assumed to be 0
  935. // So, we use the upper 30 bits of $rknum as follows...
  936. $sign = ($rknum & 0x80000000) >> 31;
  937. $exp = ($rknum & 0x7ff00000) >> 20;
  938. $mantissa = (0x100000 | ($rknum & 0x000ffffc));
  939. $value = $mantissa / pow( 2 , (20- ($exp - 1023)));
  940. if ($sign) {$value = -1 * $value;}
  941. //end of changes by mmp
  942. }
  943. if (($rknum & 0x01) != 0) {
  944. $value /= 100;
  945. }
  946. return $value;
  947. }
  948. function _encodeUTF16($string)
  949. {
  950. $result = $string;
  951. if ($this->_defaultEncoding){
  952. switch ($this->_encoderFunction){
  953. case 'iconv' : $result = iconv('UTF-16LE', $this->_defaultEncoding, $string);
  954. break;
  955. case 'mb_convert_encoding' : $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' );
  956. break;
  957. }
  958. }
  959. return $result;
  960. }
  961. function _GetInt4d($data, $pos)
  962. {
  963. $value = ord($data[$pos]) | (ord($data[$pos+1]) << 8) | (ord($data[$pos+2]) << 16) | (ord($data[$pos+3]) << 24);
  964. if ($value>=4294967294)
  965. {
  966. $value=-2;
  967. }
  968. return $value;
  969. }
  970. }
  971. /*
  972. * Local variables:
  973. * tab-width: 4
  974. * c-basic-offset: 4
  975. * c-hanging-comment-ender-p: nil
  976. * End:
  977. */
  978. ?>