PageRenderTime 130ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/src/DocBlox/Parser.php

https://github.com/androa/Docblox
PHP | 700 lines | 350 code | 55 blank | 295 comment | 26 complexity | 2305056f20f4f75631d11feae8b9c334 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * DocBlox
  4. *
  5. * PHP Version 5
  6. *
  7. * @category DocBlox
  8. * @package Parser
  9. * @author Mike van Riel <mike.vanriel@naenius.com>
  10. * @copyright 2010-2011 Mike van Riel / Naenius (http://www.naenius.com)
  11. * @license http://www.opensource.org/licenses/mit-license.php MIT
  12. * @link http://docblox-project.org
  13. */
  14. /**
  15. * Core class responsible for parsing the given files to a structure.xml file.
  16. *
  17. * @category DocBlox
  18. * @package Parser
  19. * @author Mike van Riel <mike.vanriel@naenius.com>
  20. * @license http://www.opensource.org/licenses/mit-license.php MIT
  21. * @link http://docblox-project.org
  22. */
  23. class DocBlox_Parser extends DocBlox_Core_Abstract
  24. {
  25. /** @var string the title to use in the header */
  26. protected $title = '';
  27. /**
  28. * @var string[] the glob patterns which directories/files to ignore
  29. * during parsing
  30. */
  31. protected $ignore_patterns = array();
  32. /**
  33. * @var DOMDocument|null if any structure.xml was at the target location it
  34. * is stored for comparison
  35. */
  36. protected $existing_xml = null;
  37. /**
  38. * @var bool whether we force a full re-parse, independent of existing_xml
  39. * is set
  40. */
  41. protected $force = false;
  42. /** @var bool whether to execute a PHPLint on every file */
  43. protected $validate = false;
  44. /** @var string[] which markers (i.e. TODO or FIXME) to collect */
  45. protected $markers = array('TODO', 'FIXME');
  46. /** @var string target location's root path */
  47. protected $path = null;
  48. /**
  49. * Array of visibility modifiers that should be adhered to when generating
  50. * the documentation
  51. *
  52. * @var array
  53. */
  54. protected $visibility = array('public', 'protected', 'private');
  55. /**
  56. * Sets the title for this project.
  57. *
  58. * @param string $title The intended title for this project.
  59. *
  60. * @return void
  61. */
  62. public function setTitle($title)
  63. {
  64. $this->title = $title;
  65. }
  66. /**
  67. * Returns the HTML text which is found at the title's position.
  68. *
  69. * @return null|string
  70. */
  71. public function getTitle()
  72. {
  73. return $this->title;
  74. }
  75. /**
  76. * Sets whether to force a full parse run of all files.
  77. *
  78. * @param bool $forced Forces a full parse.
  79. *
  80. * @return void
  81. */
  82. public function setForced($forced)
  83. {
  84. $this->force = $forced;
  85. }
  86. /**
  87. * Returns whether a full rebuild is required.
  88. *
  89. * To prevent incompatibilities we force a full rebuild if the version of
  90. * DocBlox does not equal the structure's version.
  91. *
  92. * @return bool
  93. */
  94. public function isForced()
  95. {
  96. $is_version_unequal = (($this->getExistingXml())
  97. && ($this->getExistingXml()->documentElement->getAttribute('version')
  98. != DocBlox_Core_Abstract::VERSION));
  99. if ($is_version_unequal) {
  100. $this->log(
  101. 'Version of DocBlox has changed since the last build; '
  102. . 'forcing a full re-build'
  103. );
  104. }
  105. return $this->force || $is_version_unequal;
  106. }
  107. /**
  108. * Sets whether to run PHPLint on every file.
  109. *
  110. * PHPLint has a huge performance impact on the execution of DocBlox and
  111. * is thus disabled by default.
  112. *
  113. * @param bool $validate when true this file will be checked.
  114. *
  115. * @return void
  116. */
  117. public function setValidate($validate)
  118. {
  119. $this->validate = $validate;
  120. }
  121. /**
  122. * Returns whether we want to run PHPLint on every file.
  123. *
  124. * @return bool
  125. */
  126. public function doValidation()
  127. {
  128. return $this->validate;
  129. }
  130. /**
  131. * Sets a list of markers to gather (i.e. TODO, FIXME).
  132. *
  133. * @param string[] $markers A list or markers to gather.
  134. *
  135. * @return void
  136. */
  137. public function setMarkers(array $markers)
  138. {
  139. $this->markers = $markers;
  140. }
  141. /**
  142. * Returns the list of markers.
  143. *
  144. * @return string[]
  145. */
  146. public function getMarkers()
  147. {
  148. return $this->markers;
  149. }
  150. /**
  151. * Imports an existing XML source to enable incremental parsing.
  152. *
  153. * @param string|null $xml XML contents if a source exists, otherwise null.
  154. *
  155. * @return void
  156. */
  157. public function setExistingXml($xml)
  158. {
  159. $dom = null;
  160. if ($xml !== null) {
  161. if (substr(trim($xml), 0, 5) != '<?xml') {
  162. $xml = (is_readable($xml))
  163. ? file_get_contents($xml)
  164. : '<?xml version="1.0" encoding="utf-8"?><docblox></docblox>';
  165. }
  166. $dom = new DOMDocument();
  167. $dom->loadXML($xml);
  168. }
  169. $this->existing_xml = $dom;
  170. }
  171. /**
  172. * Returns the existing data structure as DOMDocument.
  173. *
  174. * @return DOMDocument|null
  175. */
  176. public function getExistingXml()
  177. {
  178. return $this->existing_xml;
  179. }
  180. /**
  181. * Adds an pattern to the parsing which determines which file(s) or
  182. * directory(s) to skip.
  183. *
  184. * @param string $pattern glob-like pattern, supports * and ?
  185. *
  186. * @return void
  187. */
  188. public function addIgnorePattern($pattern)
  189. {
  190. $this->convertToPregCompliant($pattern);
  191. $this->ignore_patterns[] = $pattern;
  192. }
  193. /**
  194. * Sets all ignore patterns at once.
  195. *
  196. * @param string[] $patterns list of glob like patterns.
  197. *
  198. * @see self::addIgnorePattern()
  199. *
  200. * @return void
  201. */
  202. public function setIgnorePatterns(array $patterns)
  203. {
  204. $this->ignore_patterns = array();
  205. foreach ($patterns as $pattern) {
  206. $this->addIgnorePattern($pattern);
  207. }
  208. }
  209. /**
  210. * Returns an array with ignore patterns.
  211. *
  212. * @return string[]
  213. */
  214. public function getIgnorePatterns()
  215. {
  216. return $this->ignore_patterns;
  217. }
  218. /**
  219. * Sets the base path of the files that will be parsed.
  220. *
  221. * @param string $path Must be an absolute path.
  222. *
  223. * @return void
  224. */
  225. public function setPath($path)
  226. {
  227. $this->path = $path;
  228. }
  229. /**
  230. * Set the visibility of the methods/properties that should be documented
  231. *
  232. * @param string $visibility Comma seperated string of visibility modifiers
  233. *
  234. * @return void
  235. */
  236. public function setVisibility($visibility)
  237. {
  238. if ('' != $visibility) {
  239. $this->visibility = explode(',', $visibility);
  240. }
  241. }
  242. /**
  243. * Returns the filename, relative to the root of the project directory.
  244. *
  245. * @param string $filename The filename to make relative.
  246. *
  247. * @throws InvalidArgumentException if file is not in the project root.
  248. *
  249. * @return string
  250. */
  251. public function getRelativeFilename($filename)
  252. {
  253. // strip path from filename
  254. $result = ltrim(substr($filename, strlen($this->path)), '/');
  255. if ($result === '') {
  256. throw new InvalidArgumentException(
  257. 'File is not present in the given project path: ' . $filename
  258. );
  259. }
  260. return $result;
  261. }
  262. /**
  263. * Runs a file through the static reflectors, generates an XML file element
  264. * and returns it.
  265. *
  266. * @param string $filename The filename to parse.
  267. *
  268. * @return string|bool The XML element or false if none could be made.
  269. */
  270. function parseFile($filename)
  271. {
  272. // check whether this file is ignored; we do this in two steps:
  273. // 1. Determine whether this is a relative or absolute path, if the
  274. // string does not start with *, ?, / or \ then we assume that it is
  275. // a relative path
  276. // 2. check whether the given pattern matches with the filename (or
  277. // relative filename in case of a relative comparison)
  278. foreach ($this->getIgnorePatterns() as $pattern) {
  279. if ((($pattern[0] !== '*')
  280. && ($pattern[0] !== '?')
  281. && ($pattern[0] !== '/')
  282. && ($pattern[0] !== '\\')
  283. && (preg_match(
  284. '/^' . $pattern . '$/',
  285. $this->getRelativeFilename($filename)
  286. )))
  287. || (preg_match('/^' . $pattern . '$/', $filename))
  288. ) {
  289. $this->log(
  290. '-- File "' . $filename . '" matches ignore pattern, skipping'
  291. );
  292. return false;
  293. }
  294. }
  295. $this->log('Starting to parse file: ' . $filename);
  296. $this->debug('Starting to parse file: ' . $filename);
  297. $this->resetTimer();
  298. $result = null;
  299. try {
  300. $file = new DocBlox_Reflection_File($filename, $this->doValidation());
  301. $file->setMarkers($this->getMarkers());
  302. $file->setFilename($this->getRelativeFilename($filename));
  303. $file->setName($this->getRelativeFilename($filename));
  304. // if an existing structure exists; and we do not force re-generation;
  305. // re-use the old definition if the hash differs
  306. if (($this->getExistingXml() !== null) && (!$this->isForced())) {
  307. $xpath = new DOMXPath($this->getExistingXml());
  308. // try to find the file with it's hash
  309. /** @var DOMNodeList $qry */
  310. $qry = $xpath->query(
  311. '/project/file[@path=\'' . ltrim($file->getName(), './')
  312. . '\' and @hash=\'' . $file->getHash() . '\']'
  313. );
  314. // if an existing entry who matches the file, then re-use
  315. if ($qry->length > 0) {
  316. $new_dom = new DOMDocument('1.0', 'utf-8');
  317. $new_dom->appendChild($new_dom->importNode($qry->item(0), true));
  318. $result = $new_dom->saveXML();
  319. $this->log(
  320. '>> File has not changed since last build, re-using the '
  321. . 'old definition'
  322. );
  323. }
  324. }
  325. // if no result has been obtained; process the file
  326. if ($result === null) {
  327. $file->process();
  328. $result = $file->__toXml();
  329. }
  330. } catch (Exception $e)
  331. {
  332. $this->log(
  333. '>> Unable to parse file, an error was detected: '
  334. . $e->getMessage(),
  335. Zend_Log::ALERT
  336. );
  337. $this->debug(
  338. 'Unable to parse file "' . $filename . '", an error was detected: '
  339. . $e->getMessage()
  340. );
  341. $result = false;
  342. }
  343. $this->debug(
  344. '>> Memory after processing of file: '
  345. . number_format(memory_get_usage()) . ' bytes'
  346. );
  347. $this->debugTimer('>> Parsed file');
  348. return $result;
  349. }
  350. /**
  351. * Generates a hierarchical array of namespaces with their singular name
  352. * from a single level list of namespaces with their full name.
  353. *
  354. * @param array $namespaces the list of namespaces as retrieved from the xml.
  355. *
  356. * @return array
  357. */
  358. protected function generateNamespaceTree($namespaces)
  359. {
  360. sort($namespaces);
  361. $result = array();
  362. foreach ($namespaces as $namespace) {
  363. $namespace_list = explode('\\', $namespace);
  364. $node = &$result;
  365. foreach ($namespace_list as $singular) {
  366. if (!isset($node[$singular])) {
  367. $node[$singular] = array();
  368. }
  369. $node = &$node[$singular];
  370. }
  371. }
  372. return $result;
  373. }
  374. /**
  375. * Recursive method to create a hierarchical set of nodes in the dom.
  376. *
  377. * @param array[] $namespaces the list of namespaces to process.
  378. * @param DOMElement $parent_element the node to receive the children of
  379. * the above list.
  380. *
  381. * @return void
  382. */
  383. protected function generateNamespaceElements($namespaces, $parent_element)
  384. {
  385. foreach ($namespaces as $name => $sub_namespaces) {
  386. $node = new DOMElement('namespace');
  387. $parent_element->appendChild($node);
  388. $node->setAttribute('name', $name);
  389. $this->generateNamespaceElements($sub_namespaces, $node);
  390. }
  391. }
  392. /**
  393. * Iterates through the given files and builds the structure.xml file.
  394. *
  395. * @param string[] $files A list of filenames to parse.
  396. *
  397. * @return bool|string
  398. */
  399. public function parseFiles(array $files)
  400. {
  401. $this->log('Starting to process ' . count($files) . ' files') . PHP_EOL;
  402. $timer = microtime(true);
  403. $dom = new DOMDocument('1.0', 'utf-8');
  404. $dom->formatOutput = true;
  405. $dom->loadXML(
  406. '<project version="' . DocBlox_Core_Abstract::VERSION . '" '
  407. . 'title="' . addslashes($this->getTitle()) . '"></project>'
  408. );
  409. foreach ($files as $file) {
  410. $xml = $this->parseFile($file);
  411. if ($xml === false) {
  412. continue;
  413. }
  414. $dom_file = new DOMDocument();
  415. $dom_file->loadXML(trim($xml));
  416. // merge generated XML document into the main document
  417. $xpath = new DOMXPath($dom_file);
  418. $qry = $xpath->query('/*');
  419. for ($i = 0; $i < $qry->length; $i++) {
  420. $dom->documentElement->appendChild(
  421. $dom->importNode($qry->item($i), true)
  422. );
  423. }
  424. }
  425. $this->buildPackageTree($dom);
  426. $this->buildNamespaceTree($dom);
  427. $this->buildMarkerList($dom);
  428. $xml = $dom->saveXML();
  429. // Visibility rules
  430. $this->log('--');
  431. $this->log('Applying visibility rules');
  432. $dom = new DOMDocument();
  433. $dom->loadXML($xml);
  434. $visibilityQry = '//*[';
  435. $accessQry = '//tag[@name=\'access\' and (';
  436. foreach ($this->visibility as $key => $vis) {
  437. $visibilityQry .= '(@visibility!=\''.$vis.'\')';
  438. $accessQry .= '@description!=\''.$vis.'\'';
  439. if (($key + 1) < count($this->visibility)) {
  440. $visibilityQry .= ' and ';
  441. $accessQry .= ' and ';
  442. }
  443. }
  444. $visibilityQry .= ']';
  445. $accessQry .= ')]';
  446. $qry = '('.$visibilityQry.') | ('.$accessQry.')';
  447. $xpath = new DOMXPath($dom);
  448. $nodes = $xpath->query($qry);
  449. foreach ($nodes as $node) {
  450. if ($node->nodeName == 'tag' && $node->parentNode->parentNode->parentNode) {
  451. $remove = $node->parentNode->parentNode;
  452. $node->parentNode->parentNode->parentNode->removeChild($remove);
  453. } else {
  454. $node->parentNode->removeChild($node);
  455. }
  456. }
  457. $xml = $dom->saveXML();
  458. $this->log('--');
  459. $this->log(
  460. 'Elapsed time to parse all files: '
  461. . round(microtime(true) - $timer, 2) . 's'
  462. );
  463. $this->log(
  464. 'Peak memory usage: '
  465. . round(memory_get_peak_usage() / 1024 / 1024, 2) . 'M'
  466. );
  467. return $xml;
  468. }
  469. /**
  470. * Collects all packages and subpackages, and adds a new section in the
  471. * DOM to provide an overview.
  472. *
  473. * @param DOMDocument $dom Packages are extracted and a summary inserted
  474. * in this object.
  475. *
  476. * @return void
  477. */
  478. protected function buildPackageTree(DOMDocument $dom)
  479. {
  480. // collect all packages and store them in the XML
  481. $this->log('Collecting all packages');
  482. $packages = array('' => '');
  483. // at least insert a default package
  484. $node = new DOMElement('package');
  485. $dom->documentElement->appendChild($node);
  486. $node->setAttribute('name', '');
  487. $xpath = new DOMXPath($dom);
  488. $qry = $xpath->query(
  489. '/project/file/class/docblock/tag[@name="package"]'
  490. . '|/project/file/interface/docblock/tag[@name="package"]'
  491. . '|/project/file/docblock/tag[@name="package"]'
  492. );
  493. // iterate through all packages
  494. for ($i = 0; $i < $qry->length; $i++) {
  495. $package_name = $qry->item($i)->attributes
  496. ->getNamedItem('description')->nodeValue;
  497. if (isset($packages[$package_name])) {
  498. continue;
  499. }
  500. $packages[$package_name] = array();
  501. // find all subpackages
  502. $qry2 = $xpath->query(
  503. '//docblock/tag[@name="package" and @description="'
  504. . $package_name . '"]/../tag[@name="subpackage"]'
  505. );
  506. for ($i2 = 0; $i2 < $qry2->length; $i2++) {
  507. $packages[$package_name][] = $qry2->item($i2)->attributes
  508. ->getNamedItem('description')->nodeValue;
  509. }
  510. $packages[$package_name] = array_unique($packages[$package_name]);
  511. // create package XMl and subpackages
  512. $node = new DOMElement('package');
  513. $dom->documentElement->appendChild($node);
  514. $node->setAttribute('name', $package_name);
  515. foreach ($packages[$package_name] as $subpackage) {
  516. $node->appendChild(new DOMElement('subpackage', $subpackage));
  517. }
  518. }
  519. }
  520. /**
  521. * Collects all namespaces and sub-namespaces, and adds a new section in
  522. * the DOM to provide an overview.
  523. *
  524. * @param DOMDocument $dom Namespaces are extracted and a summary inserted
  525. * in this object.
  526. *
  527. * @return void
  528. */
  529. protected function buildNamespaceTree(DOMDocument $dom)
  530. {
  531. $this->log('Collecting all namespaces');
  532. $xpath = new DOMXPath($dom);
  533. $namespaces = array();
  534. $qry = $xpath->query('//@namespace');
  535. for ($i = 0; $i < $qry->length; $i++) {
  536. if (isset($namespaces[$qry->item($i)->nodeValue])) {
  537. continue;
  538. }
  539. $namespaces[$qry->item($i)->nodeValue] = true;
  540. }
  541. $namespaces = $this->generateNamespaceTree(array_keys($namespaces));
  542. $this->generateNamespaceElements($namespaces, $dom->documentElement);
  543. }
  544. /**
  545. * Retrieves a list of all marker types and adds them to the XML for
  546. * easy referencing.
  547. *
  548. * @param DOMDocument $dom Markers are extracted and a summary inserted in
  549. * this object.
  550. *
  551. * @return void
  552. */
  553. protected function buildMarkerList(DOMDocument $dom)
  554. {
  555. $this->log('Collecting all marker types');
  556. foreach ($this->getMarkers() as $marker) {
  557. $node = new DOMElement('marker', strtolower($marker));
  558. $dom->documentElement->appendChild($node);
  559. }
  560. }
  561. /**
  562. * Converts $string into a string that can be used with preg_match.
  563. *
  564. * @param string &$string Glob-like pattern with wildcards ? and *.
  565. *
  566. * @author Greg Beaver <cellog@php.net>
  567. * @author mike van Riel <mike.vanriel@naenius.com>
  568. *
  569. * @see PhpDocumentor/phpDocumentor/Io.php
  570. *
  571. * @return void
  572. */
  573. function convertToPregCompliant(&$string)
  574. {
  575. $y = (DIRECTORY_SEPARATOR == '\\') ? '\\\\' : '\/';
  576. $string = str_replace('/', DIRECTORY_SEPARATOR, $string);
  577. $x = strtr(
  578. $string,
  579. array(
  580. '?' => '.',
  581. '*' => '.*',
  582. '.' => '\\.',
  583. '\\' => '\\\\',
  584. '/' => '\\/',
  585. '[' => '\\[',
  586. ']' => '\\]',
  587. '-' => '\\-'
  588. )
  589. );
  590. if ((strpos($string, DIRECTORY_SEPARATOR) !== false)
  591. && (strrpos($string, DIRECTORY_SEPARATOR) === strlen($string) - 1)
  592. ) {
  593. $x = "(?:.*$y$x?.*|$x.*)";
  594. }
  595. $string = $x;
  596. }
  597. /**
  598. * Finds the common path of all passed paths.
  599. *
  600. * @param array $paths list of paths to check.
  601. *
  602. * @return string
  603. */
  604. public function getCommonPath(array $paths)
  605. {
  606. $base = '';
  607. $parts = explode(DIRECTORY_SEPARATOR, realpath($paths[0]));
  608. foreach ($parts as $part) {
  609. $base_part = $base . $part . DIRECTORY_SEPARATOR;
  610. foreach ($paths as $dir) {
  611. if (substr(realpath($dir), 0, strlen($base_part)) != $base_part) {
  612. return $base;
  613. }
  614. }
  615. $base = $base_part;
  616. }
  617. }
  618. }