PageRenderTime 25ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/application/third_party/PHPExcel/Reader/HTML.php

https://gitlab.com/dmsapiens/physicians
PHP | 478 lines | 291 code | 39 blank | 148 comment | 35 complexity | 0508a2c2a11638e1752305157ad04199 MD5 | raw file
  1. <?php
  2. /**
  3. * PHPExcel
  4. *
  5. * Copyright (c) 2006 - 2012 PHPExcel
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * @category PHPExcel
  22. * @package PHPExcel_Reader
  23. * @copyright Copyright (c) 2006 - 2012 PHPExcel (http://www.codeplex.com/PHPExcel)
  24. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
  25. * @version ##VERSION##, ##DATE##
  26. */
  27. /** PHPExcel root directory */
  28. if (!defined('PHPEXCEL_ROOT')) {
  29. /**
  30. * @ignore
  31. */
  32. define('PHPEXCEL_ROOT', dirname(__FILE__) . '/../../');
  33. require(PHPEXCEL_ROOT . 'PHPExcel/Autoloader.php');
  34. }
  35. /**
  36. * PHPExcel_Reader_HTML
  37. *
  38. * @category PHPExcel
  39. * @package PHPExcel_Reader
  40. * @copyright Copyright (c) 2006 - 2012 PHPExcel (http://www.codeplex.com/PHPExcel)
  41. */
  42. class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader
  43. {
  44. /**
  45. * Input encoding
  46. *
  47. * @var string
  48. */
  49. private $_inputEncoding = 'ANSI';
  50. /**
  51. * Sheet index to read
  52. *
  53. * @var int
  54. */
  55. private $_sheetIndex = 0;
  56. /**
  57. * Formats
  58. *
  59. * @var array
  60. */
  61. private $_formats = array( 'h1' => array( 'font' => array( 'bold' => true,
  62. 'size' => 24,
  63. ),
  64. ), // Bold, 24pt
  65. 'h2' => array( 'font' => array( 'bold' => true,
  66. 'size' => 18,
  67. ),
  68. ), // Bold, 18pt
  69. 'h3' => array( 'font' => array( 'bold' => true,
  70. 'size' => 13.5,
  71. ),
  72. ), // Bold, 13.5pt
  73. 'h4' => array( 'font' => array( 'bold' => true,
  74. 'size' => 12,
  75. ),
  76. ), // Bold, 12pt
  77. 'h5' => array( 'font' => array( 'bold' => true,
  78. 'size' => 10,
  79. ),
  80. ), // Bold, 10pt
  81. 'h6' => array( 'font' => array( 'bold' => true,
  82. 'size' => 7.5,
  83. ),
  84. ), // Bold, 7.5pt
  85. 'a' => array( 'font' => array( 'underline' => true,
  86. 'color' => array( 'argb' => PHPExcel_Style_Color::COLOR_BLUE,
  87. ),
  88. ),
  89. ), // Blue underlined
  90. 'hr' => array( 'borders' => array( 'bottom' => array( 'style' => PHPExcel_Style_Border::BORDER_THIN,
  91. 'color' => array( PHPExcel_Style_Color::COLOR_BLACK,
  92. ),
  93. ),
  94. ),
  95. ), // Bottom border
  96. );
  97. /**
  98. * Create a new PHPExcel_Reader_HTML
  99. */
  100. public function __construct() {
  101. $this->_readFilter = new PHPExcel_Reader_DefaultReadFilter();
  102. }
  103. /**
  104. * Can the current PHPExcel_Reader_IReader read the file?
  105. *
  106. * @param string $pFileName
  107. * @return boolean
  108. * @throws PHPExcel_Reader_Exception
  109. */
  110. public function canRead($pFilename)
  111. {
  112. // Check if file exists
  113. if (!file_exists($pFilename)) {
  114. throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
  115. }
  116. // Read sample data (first 2 KB will do)
  117. $fh = fopen($pFilename, 'r');
  118. $data = fread($fh, 2048);
  119. fclose($fh);
  120. if ((strpos('<',$data) !== FALSE) &&
  121. (strlen($data) !== strlen(strip_tags($data)))) {
  122. return TRUE;
  123. }
  124. return FALSE;
  125. }
  126. /**
  127. * Loads PHPExcel from file
  128. *
  129. * @param string $pFilename
  130. * @return PHPExcel
  131. * @throws PHPExcel_Reader_Exception
  132. */
  133. public function load($pFilename)
  134. {
  135. // Create new PHPExcel
  136. $objPHPExcel = new PHPExcel();
  137. // Load into this instance
  138. return $this->loadIntoExisting($pFilename, $objPHPExcel);
  139. }
  140. /**
  141. * Set input encoding
  142. *
  143. * @param string $pValue Input encoding
  144. */
  145. public function setInputEncoding($pValue = 'ANSI')
  146. {
  147. $this->_inputEncoding = $pValue;
  148. return $this;
  149. }
  150. /**
  151. * Get input encoding
  152. *
  153. * @return string
  154. */
  155. public function getInputEncoding()
  156. {
  157. return $this->_inputEncoding;
  158. }
  159. // Data Array used for testing only, should write to PHPExcel object on completion of tests
  160. private $_dataArray = array();
  161. private $_tableLevel = 0;
  162. private $_nestedColumn = array('A');
  163. private function _setTableStartColumn($column) {
  164. if ($this->_tableLevel == 0)
  165. $column = 'A';
  166. ++$this->_tableLevel;
  167. $this->_nestedColumn[$this->_tableLevel] = $column;
  168. return $this->_nestedColumn[$this->_tableLevel];
  169. }
  170. private function _getTableStartColumn() {
  171. return $this->_nestedColumn[$this->_tableLevel];
  172. }
  173. private function _releaseTableStartColumn() {
  174. --$this->_tableLevel;
  175. return array_pop($this->_nestedColumn);
  176. }
  177. private function _flushCell($sheet,$column,$row,&$cellContent) {
  178. if (is_string($cellContent)) {
  179. // Simple String content
  180. if (trim($cellContent) > '') {
  181. // Only actually write it if there's content in the string
  182. // echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '<br />';
  183. // Write to worksheet to be done here...
  184. // ... we return the cell so we can mess about with styles more easily
  185. $cell = $sheet->setCellValue($column.$row,$cellContent,true);
  186. $this->_dataArray[$row][$column] = $cellContent;
  187. }
  188. } else {
  189. // We have a Rich Text run
  190. // TODO
  191. $this->_dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
  192. }
  193. $cellContent = (string) '';
  194. }
  195. private function _processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent){
  196. foreach($element->childNodes as $child){
  197. if ($child instanceof DOMText) {
  198. $domText = preg_replace('/\s+/',' ',trim($child->nodeValue));
  199. if (is_string($cellContent)) {
  200. // simply append the text if the cell content is a plain text string
  201. $cellContent .= $domText;
  202. } else {
  203. // but if we have a rich text run instead, we need to append it correctly
  204. // TODO
  205. }
  206. } elseif($child instanceof DOMElement) {
  207. // echo '<b>DOM ELEMENT: </b>' , strtoupper($child->nodeName) , '<br />';
  208. $attributeArray = array();
  209. foreach($child->attributes as $attribute) {
  210. // echo '<b>ATTRIBUTE: </b>' , $attribute->name , ' => ' , $attribute->value , '<br />';
  211. $attributeArray[$attribute->name] = $attribute->value;
  212. }
  213. switch($child->nodeName) {
  214. case 'meta' :
  215. foreach($attributeArray as $attributeName => $attributeValue) {
  216. switch($attributeName) {
  217. case 'content':
  218. // TODO
  219. // Extract character set, so we can convert to UTF-8 if required
  220. break;
  221. }
  222. }
  223. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  224. break;
  225. case 'title' :
  226. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  227. $sheet->setTitle($cellContent);
  228. $cellContent = '';
  229. break;
  230. case 'span' :
  231. case 'div' :
  232. case 'font' :
  233. case 'i' :
  234. case 'em' :
  235. case 'strong':
  236. case 'b' :
  237. // echo 'STYLING, SPAN OR DIV<br />';
  238. if ($cellContent > '')
  239. $cellContent .= ' ';
  240. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  241. if ($cellContent > '')
  242. $cellContent .= ' ';
  243. // echo 'END OF STYLING, SPAN OR DIV<br />';
  244. break;
  245. case 'hr' :
  246. $this->_flushCell($sheet,$column,$row,$cellContent);
  247. ++$row;
  248. if (isset($this->_formats[$child->nodeName])) {
  249. $sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]);
  250. } else {
  251. $cellContent = '----------';
  252. $this->_flushCell($sheet,$column,$row,$cellContent);
  253. }
  254. ++$row;
  255. case 'br' :
  256. if ($this->_tableLevel > 0) {
  257. // If we're inside a table, replace with a \n
  258. $cellContent .= "\n";
  259. } else {
  260. // Otherwise flush our existing content and move the row cursor on
  261. $this->_flushCell($sheet,$column,$row,$cellContent);
  262. ++$row;
  263. }
  264. // echo 'HARD LINE BREAK: ' , '<br />';
  265. break;
  266. case 'a' :
  267. // echo 'START OF HYPERLINK: ' , '<br />';
  268. foreach($attributeArray as $attributeName => $attributeValue) {
  269. switch($attributeName) {
  270. case 'href':
  271. // echo 'Link to ' , $attributeValue , '<br />';
  272. $sheet->getCell($column.$row)->getHyperlink()->setUrl($attributeValue);
  273. if (isset($this->_formats[$child->nodeName])) {
  274. $sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]);
  275. }
  276. break;
  277. }
  278. }
  279. $cellContent .= ' ';
  280. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  281. // echo 'END OF HYPERLINK:' , '<br />';
  282. break;
  283. case 'h1' :
  284. case 'h2' :
  285. case 'h3' :
  286. case 'h4' :
  287. case 'h5' :
  288. case 'h6' :
  289. case 'ol' :
  290. case 'ul' :
  291. case 'p' :
  292. if ($this->_tableLevel > 0) {
  293. // If we're inside a table, replace with a \n
  294. $cellContent .= "\n";
  295. // echo 'LIST ENTRY: ' , '<br />';
  296. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  297. // echo 'END OF LIST ENTRY:' , '<br />';
  298. } else {
  299. if ($cellContent > '') {
  300. $this->_flushCell($sheet,$column,$row,$cellContent);
  301. $row += 2;
  302. }
  303. // echo 'START OF PARAGRAPH: ' , '<br />';
  304. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  305. // echo 'END OF PARAGRAPH:' , '<br />';
  306. $this->_flushCell($sheet,$column,$row,$cellContent);
  307. if (isset($this->_formats[$child->nodeName])) {
  308. $sheet->getStyle($column.$row)->applyFromArray($this->_formats[$child->nodeName]);
  309. }
  310. $row += 2;
  311. $column = 'A';
  312. }
  313. break;
  314. case 'li' :
  315. if ($this->_tableLevel > 0) {
  316. // If we're inside a table, replace with a \n
  317. $cellContent .= "\n";
  318. // echo 'LIST ENTRY: ' , '<br />';
  319. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  320. // echo 'END OF LIST ENTRY:' , '<br />';
  321. } else {
  322. if ($cellContent > '') {
  323. $this->_flushCell($sheet,$column,$row,$cellContent);
  324. }
  325. ++$row;
  326. // echo 'LIST ENTRY: ' , '<br />';
  327. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  328. // echo 'END OF LIST ENTRY:' , '<br />';
  329. $this->_flushCell($sheet,$column,$row,$cellContent);
  330. $column = 'A';
  331. }
  332. break;
  333. case 'table' :
  334. $this->_flushCell($sheet,$column,$row,$cellContent);
  335. $column = $this->_setTableStartColumn($column);
  336. // echo 'START OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
  337. if ($this->_tableLevel > 1)
  338. --$row;
  339. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  340. // echo 'END OF TABLE LEVEL ' , $this->_tableLevel , '<br />';
  341. $column = $this->_releaseTableStartColumn();
  342. if ($this->_tableLevel > 1) {
  343. ++$column;
  344. } else {
  345. ++$row;
  346. }
  347. break;
  348. case 'thead' :
  349. case 'tbody' :
  350. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  351. break;
  352. case 'tr' :
  353. ++$row;
  354. $column = $this->_getTableStartColumn();
  355. $cellContent = '';
  356. // echo 'START OF TABLE ' , $this->_tableLevel , ' ROW<br />';
  357. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  358. // echo 'END OF TABLE ' , $this->_tableLevel , ' ROW<br />';
  359. break;
  360. case 'th' :
  361. case 'td' :
  362. // echo 'START OF TABLE ' , $this->_tableLevel , ' CELL<br />';
  363. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  364. // echo 'END OF TABLE ' , $this->_tableLevel , ' CELL<br />';
  365. $this->_flushCell($sheet,$column,$row,$cellContent);
  366. ++$column;
  367. break;
  368. case 'body' :
  369. $row = 1;
  370. $column = 'A';
  371. $content = '';
  372. $this->_tableLevel = 0;
  373. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  374. break;
  375. default:
  376. $this->_processDomElement($child,$sheet,$row,$column,$cellContent);
  377. }
  378. }
  379. }
  380. }
  381. /**
  382. * Loads PHPExcel from file into PHPExcel instance
  383. *
  384. * @param string $pFilename
  385. * @param PHPExcel $objPHPExcel
  386. * @return PHPExcel
  387. * @throws PHPExcel_Reader_Exception
  388. */
  389. public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
  390. {
  391. // Check if file exists
  392. if (!file_exists($pFilename)) {
  393. throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
  394. }
  395. if (!is_file($pFilename)) {
  396. throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! The given file is not a regular file.");
  397. }
  398. // Create new PHPExcel
  399. while ($objPHPExcel->getSheetCount() <= $this->_sheetIndex) {
  400. $objPHPExcel->createSheet();
  401. }
  402. $objPHPExcel->setActiveSheetIndex( $this->_sheetIndex );
  403. // Create a new DOM object
  404. $dom = new domDocument;
  405. // Load the HTML file into the DOM object
  406. $loaded = $dom->loadHTMLFile($pFilename);
  407. if ($loaded === false) {
  408. throw new PHPExcel_Reader_Exception('Failed to load ',$pFilename,' as a DOM Document');
  409. }
  410. // Discard white space
  411. $dom->preserveWhiteSpace = false;
  412. $row = 0;
  413. $column = 'A';
  414. $content = '';
  415. $this->_processDomElement($dom,$objPHPExcel->getActiveSheet(),$row,$column,$content);
  416. // echo '<hr />';
  417. // var_dump($this->_dataArray);
  418. // Return
  419. return $objPHPExcel;
  420. }
  421. /**
  422. * Get sheet index
  423. *
  424. * @return int
  425. */
  426. public function getSheetIndex() {
  427. return $this->_sheetIndex;
  428. }
  429. /**
  430. * Set sheet index
  431. *
  432. * @param int $pValue Sheet index
  433. * @return PHPExcel_Reader_HTML
  434. */
  435. public function setSheetIndex($pValue = 0) {
  436. $this->_sheetIndex = $pValue;
  437. return $this;
  438. }
  439. }