PageRenderTime 41ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/classes/PDParser.php

https://github.com/DerManoMann/phpdoccer
PHP | 483 lines | 346 code | 36 blank | 101 comment | 144 complexity | a9e04fed432af0ac805170925e3d3c3b MD5 | raw file
  1. <?php
  2. /**
  3. * A PHP doc parser based on PhpDoctor.
  4. *
  5. * <p>Needs PHP5 to run.</p>
  6. *
  7. * @todo Support for PHP native packages
  8. */
  9. class PDParser {
  10. private $mediator_;
  11. private $logger_;
  12. private $tagFactory_;
  13. private $tokenizer_;
  14. /*
  15. * Parse flags.
  16. */
  17. private $open_curly_braces_;
  18. private $in_parsed_string_;
  19. private $curly_level_;
  20. // current container
  21. private $currentContainer_;
  22. // current doc comment
  23. private $currentDocComment_;
  24. // details extracted from code rather than doc
  25. private $currentCodeInfo_;
  26. // container stack
  27. private $containerStack_;
  28. private $lineNumber_;
  29. /**
  30. * Create new parser.
  31. *
  32. * @param PDmediator mediator A mediator.
  33. */
  34. public function __construct(PDMediator $mediator) {
  35. $this->mediator_ = $mediator;
  36. $this->logger_ = $mediator->getLogger();
  37. $this->tagFactory_ = $mediator->getTagFactory();
  38. }
  39. /**
  40. * Parse a doc comment into a doc comment array.
  41. *
  42. * @param string comment The comment.
  43. * @return array Doc and tag data.
  44. */
  45. protected function parseDocComment($comment) {
  46. if ('/**' != substr(trim($comment), 0, 3)) {
  47. return array();
  48. }
  49. $data = array(
  50. 'docComment' => $comment,
  51. 'tags' => array()
  52. /*
  53. '@package' => null,
  54. '@fieldType' => null,
  55. '@visibility' => null,
  56. '@abstract' => false,
  57. '@final' => false,
  58. '@static' => false
  59. )
  60. */
  61. );
  62. // split into token
  63. $commentToken = preg_split('/[\n|\r][ \r\n\t\/]*\*[ \t]*@/', "\n".$comment);
  64. // match text
  65. preg_match_all('/^[ \t\/*]*\*\/? ?(.*)[ \t\/*]*$/m', array_shift($commentToken), $matches);
  66. if (isset($matches[1])) {
  67. // plain text
  68. $data['tags']['@text'] = array($this->tagFactory_->createTag('@text', trim(join("\n", $matches[1])), $data, $this->mediator_));
  69. }
  70. // process tags
  71. foreach ($commentToken as $tag) {
  72. // strip whitespace and asterix's from beginning
  73. $tag = preg_replace('/(^[\s\n\r\*]+|\s*\*\/$)/m', ' ', $tag);
  74. $tag = preg_replace('/[\r\n]+/', '', $tag);
  75. $pos = strpos($tag, ' ');
  76. if (false !== $pos) {
  77. $name = trim(substr($tag, 0, $pos));
  78. $text = trim(substr($tag, $pos + 1), "\n\r \t");
  79. } else {
  80. $name = $tag;
  81. $text = null;
  82. }
  83. switch ($name) {
  84. case 'package':
  85. // place current element in package
  86. $data['tags']['@package'] = $text;
  87. break;
  88. case 'var':
  89. // set variable type
  90. $data['tags']['@fieldType'] = $text;
  91. break;
  92. case 'access':
  93. // set access permission
  94. $data['tags']['@visibility'] = $text;
  95. break;
  96. case 'final':
  97. // element is final
  98. $data['tags']['@final'] = true;
  99. break;
  100. case 'abstract':
  101. // element is abstract
  102. $data['tags']['@abstract'] = true;
  103. break;
  104. case 'static':
  105. // element is static
  106. $data['tags']['@static'] = true;
  107. break;
  108. default:
  109. // other tag
  110. $tagName = '@'.$name;
  111. if (!array_key_exists($tagName, $data['tags'])) {
  112. $data['tags'][$tagName] = array();
  113. }
  114. $data['tags'][$tagName][] = $this->tagFactory_->createTag($tagName, $text, $data, $this->mediator_);
  115. }
  116. }
  117. return $data;
  118. }
  119. /**
  120. * Create new container.
  121. *
  122. * @param string type The container type.
  123. * @param string name Optional name.
  124. */
  125. protected function createContainer($type, $name=null) {
  126. $class = ucwords($type).'Container';
  127. if (!class_exists($class)) {
  128. require_once dirname(__FILE__) . DIRECTORY_SEPARATOR . 'container'.DIRECTORY_SEPARATOR . $class . '.php';
  129. }
  130. $container = new $class($this->currentDocComment_, $this->currentCodeInfo_);
  131. $container->set('name', $name);
  132. $this->currentDocComment_ = array();
  133. // same defaults as docData
  134. $this->currentCodeInfo_ = array(
  135. '@visibility' => null,
  136. '@abstract' => false,
  137. '@final' => false,
  138. '@static' => false
  139. );
  140. $container->set('filename', $this->tokenizer_->getFilename());
  141. $container->set('lineNumber', $this->lineNumber_);
  142. $container->setMediator($this->mediator_);
  143. return $container;
  144. }
  145. /**
  146. * Reset parser.
  147. */
  148. protected function reset() {
  149. $this->open_curly_braces_ = false;
  150. $this->in_parsed_string_ = false;
  151. $this->curly_level_ = 0;
  152. // current container
  153. $this->currentContainer_ = $this->createContainer('file');
  154. // current doc comment
  155. $this->currentDocComment_ = array();
  156. // details extracted from code rather than doc
  157. $this->currentCodeInfo_ = array();
  158. // container stack
  159. $this->containerStack_ = array();
  160. $this->lineNumber_ = 1;
  161. }
  162. /**
  163. * Get the next token and do some other stuff.
  164. *
  165. * @return mixed A token.
  166. */
  167. protected function nextToken() {
  168. $token = $this->tokenizer_->next();
  169. if (is_array($token)) {
  170. $this->lineNumber_ = $token[2];
  171. }
  172. return $token;
  173. }
  174. /**
  175. * Parse a given unit (file).
  176. *
  177. * @param PDTokenizer tokenizer The tokenizer.
  178. * @return ZMDocContainer The global container for this parser run.
  179. * @todo set filename on containers
  180. * @todo line numbers
  181. * @todo make flags and stacks and stuff class members and have some methods to create container, reset, etc.
  182. */
  183. public function parse(PDTokenizer $tokenizer) {
  184. $this->tokenizer_ = $tokenizer;
  185. $this->reset();
  186. $this->logger_->message('Parsing ...');
  187. while ($tokenizer->hasNext()) {
  188. $token = $this->nextToken();
  189. if (!$this->in_parsed_string_ && is_array($token)) {
  190. switch ($token[0]) {
  191. case T_COMMENT:
  192. // read comment
  193. case T_DOC_COMMENT:
  194. // catch PHP5 doc comment token too
  195. $this->currentDocComment_ = $this->parseDocComment($token[1]);
  196. break;
  197. case T_CLASS:
  198. // read class
  199. if (null != $this->currentContainer_) {
  200. $this->currentContainer_->add($this->currentContainer_);
  201. $this->containerStack_[] = $this->currentContainer_;
  202. }
  203. $this->currentContainer_ = $this->createContainer('class', $tokenizer->peekNext(T_STRING));
  204. break;
  205. case T_INTERFACE:
  206. // read interface
  207. if (null != $this->currentContainer_) {
  208. $this->currentContainer_->add($this->currentContainer_);
  209. $this->containerStack_[] = $this->currentContainer_;
  210. }
  211. $this->currentContainer_ = $this->createContainer('interface', $tokenizer->peekNext(T_STRING));
  212. break;
  213. case T_EXTENDS:
  214. // get extends clause
  215. $this->currentContainer_->set('parentName', $tokenizer->peekNext(T_STRING));
  216. break;
  217. case T_IMPLEMENTS:
  218. // get implements clause
  219. $offset = 0;
  220. while (null !== ($peekToken = $tokenizer->peekOffset(++$offset))) {
  221. if (is_string($peekToken) && '{' == $peekToken) {
  222. break;
  223. }
  224. if ($peekToken[0] == T_STRING) {
  225. $interface = $peekToken[1];
  226. $this->currentContainer_->add('implements', $peekToken[1]);
  227. }
  228. }
  229. break;
  230. case T_THROW:
  231. // do not just assume that exceptions are created via new
  232. if (null !== ($peekToken = $tokenizer->peekNext(array(T_NEW, T_STRING), 0, 2)) && 'new' == $peekToken) {
  233. $this->currentContainer_->add('throws', $tokenizer->peekNext(T_STRING));
  234. }
  235. break;
  236. // the following six need to be stored somewhere
  237. case T_PRIVATE:
  238. $this->currentCodeInfo_['visibility'] = 'private';
  239. break;
  240. case T_PROTECTED:
  241. $this->currentCodeInfo_['visibility'] = 'protected';
  242. break;
  243. case T_PUBLIC:
  244. $this->currentCodeInfo_['visibility'] = 'public';
  245. break;
  246. case T_ABSTRACT:
  247. $this->currentCodeInfo_['abstract'] = true;
  248. break;
  249. case T_FINAL:
  250. $this->currentCodeInfo_['final'] = true;
  251. break;
  252. case T_STATIC:
  253. $this->currentCodeInfo_['static'] = true;
  254. break;
  255. case T_FUNCTION:
  256. // read function
  257. if (null != $this->currentContainer_) {
  258. $nextContainer = $this->createContainer('function', $tokenizer->peekNext(T_STRING));
  259. $this->currentContainer_->add($nextContainer);
  260. $this->containerStack_[] = $this->currentContainer_;
  261. $this->currentContainer_ = $nextContainer;
  262. }
  263. break;
  264. case T_CURLY_OPEN:
  265. case T_DOLLAR_OPEN_CURLY_BRACES:
  266. // we must catch this so we don't accidently step out of the current block
  267. $this->open_curly_braces_ = true;
  268. break;
  269. case T_STRING:
  270. $peekTokenSub1 = $tokenizer->peekOffset(-1);
  271. $peekTokenSub2 = $tokenizer->peekOffset(-2);
  272. $peekTokenAdd2 = $tokenizer->peekOffset(2);
  273. if ('define' == $token[1] && T_CONSTANT_ENCAPSED_STRING == $peekTokenAdd2[0]) {
  274. // read global constant
  275. $newContainer = $this->createContainer('const', trim($peekTokenAdd2[1], '\''));
  276. // skip the token we peeked adhead earlier
  277. $tokenizer->skip(3);
  278. $value = '';
  279. while (';' != ($token = $this->nextToken())) {
  280. $value .= (is_array($token) ? $token[1] : $token);
  281. }
  282. $newContainer->set('value', trim($value, ' ()'));
  283. if (0 < count($this->containerStack_)) {
  284. $this->containerStack_[0]->add($newContainer);
  285. } else {
  286. $this->currentContainer_->add($newContainer);
  287. }
  288. } else if (T_WHITESPACE == $peekTokenSub1[0] && T_CONST == $peekTokenSub2[0]) {
  289. // member constant
  290. unset($value);
  291. do {
  292. $token = $this->nextToken();
  293. if ('=' == $token) {
  294. $value = '';
  295. } else if (',' == $token || ';' == $token) {
  296. $newContainer = $this->createContainer('const', $tokenizer->peekPrev(array(T_VARIABLE, T_STRING)));
  297. $newContainer->set('value', $value);
  298. $newContainer->set('fieldType', 'const');
  299. $this->currentContainer_->add($newContainer);
  300. unset ($value);
  301. } else if (isset($value)) {
  302. // we've hit a '=' before
  303. if (is_array($token)) {
  304. $value .= $token[1];
  305. } else {
  306. $value .= $token;
  307. }
  308. }
  309. } while (';' != $token);
  310. } else if ('function' == $this->currentContainer_->getType() && 1 == $this->curly_level_) {
  311. // function parameter
  312. unset($newContainer);
  313. do {
  314. $token = $this->nextToken();
  315. if (',' == $token || '}' == $token) {
  316. unset($newContainer);
  317. } else if (is_array($token)) {
  318. if (T_VARIABLE == $token[0] && !isset($newContainer)) {
  319. $newContainer = $this->createContainer('parameter', $token[1]);
  320. $this->currentContainer_->add($newContainer);
  321. // is there a type hint?
  322. $offset = 0;
  323. do {
  324. $peekToken = $tokenizer->peekOffset(--$offset);
  325. if (is_array($peekToken) && T_STRING == $peekToken[0]) {
  326. $newContainer->set('typeHint', $peekToken[1]);
  327. }
  328. } while ('(' != $peekToken && ',' != $peekToken);
  329. } else if (isset($newContainer) && (T_STRING == $token[0] || T_CONSTANT_ENCAPSED_STRING == $token[0])) {
  330. // set value
  331. $newContainer->set('defaultValue', $token[1]);
  332. }
  333. }
  334. } while (')' != $token);
  335. // get parent back
  336. $this->currentContainer_ = array_pop($this->containerStack_);
  337. }
  338. break;
  339. case T_VARIABLE:
  340. if ('global' == $this->currentContainer_->getType()) {
  341. // global var
  342. $newContainer = $this->createContainer('field', $token[1]);
  343. // try for var type
  344. $lastToken = $tokenizer->peekOffset(-1);
  345. $secondLastToken = $tokenizer->peekOffset(-1);
  346. if (isset($lastToken[0]) && isset($secondLastToken[0]) && T_STRING == $secondLastToken[0] && T_WHITESPACE == $lastToken[0]) {
  347. $newContainer->set('fieldType', $secondLastToken[1]);
  348. }
  349. // fish for default value
  350. while ($tokenizer->hasNext()) {
  351. $token = $this->nextToken();
  352. if ('=' == $token || ';' == $token) {
  353. break;
  354. }
  355. }
  356. if ('=' == $token) {
  357. $default = '';
  358. $offset = 1;
  359. do {
  360. $peekToken = $tokenizer->peekOffset($offset);
  361. if (is_array($peekToken)) {
  362. if ('=' != $peekToken[1]) {
  363. $default .= $peekToken[1];
  364. }
  365. } else {
  366. if ('=' != $peekToken) {
  367. $default .= $peekToken;
  368. }
  369. }
  370. ++$offset;
  371. } while (isset($peekToken) && ';' != $peekToken && ',' != $peekToken && ')' != $peekToken);
  372. $newContainer->set('fieldDefault', trim($default, ' ()'));
  373. }
  374. $this->currentContainer_->add($newContainer);
  375. } else if (2 > $this->curly_level_) {
  376. // member var
  377. unset($value);
  378. $peekTokenSub1 = $tokenizer->peekOffset(-1);
  379. $peekTokenSub2 = $tokenizer->peekOffset(-2);
  380. do {
  381. $token = $this->nextToken();
  382. if ('=' == $token) {
  383. $value = '';
  384. } else if (',' == $token || ';' == $token) {
  385. $newContainer = $this->createContainer('field', $tokenizer->peekPrev(T_VARIABLE));
  386. if (isset($value)) {
  387. $newContainer->set('defaultValue', trim($value));
  388. }
  389. $this->currentContainer_->add($newContainer);
  390. if (T_WHITESPACE == $peekTokenSub1[0] && T_VAR == $peekTokenSub2[0]) {
  391. $newContainer->set('fieldType', 'var');
  392. }
  393. unset($value);
  394. } else if (isset($value)) {
  395. if (is_array($token)) {
  396. $value .= $token[1];
  397. } else {
  398. $value .= $token;
  399. }
  400. }
  401. } while (';' != $token);
  402. }
  403. break;
  404. } // switch
  405. } else {
  406. // plain text token
  407. switch ($token) {
  408. case '{':
  409. // keep track of blocks to ignore body vars
  410. if (!$this->in_parsed_string_) {
  411. ++$this->curly_level_;
  412. }
  413. break;
  414. case '}':
  415. // keep track of blocks to ignore body vars
  416. if (!$this->in_parsed_string_) {
  417. if ($this->open_curly_braces_) {
  418. $this->open_curly_braces_ = false;
  419. } else {
  420. --$this->curly_level_;
  421. if (0 == $this->curly_level_ && 0 < count($this->containerStack_)) {
  422. $this->logger_->verbose('leaving ' . $this->currentContainer_->get('name'));
  423. array_pop($this->containerStack_);
  424. }
  425. }
  426. }
  427. break;
  428. case '"':
  429. // catch parsed strings so as to ignore tokens within
  430. $this->in_parsed_string_ = !$this->in_parsed_string_;
  431. break;
  432. }
  433. }
  434. }
  435. if (0 < count($this->containerStack_)) {
  436. $fileContainer = array_pop($this->containerStack_);
  437. } else {
  438. $fileContainer = $this->currentContainer_;
  439. }
  440. // make sure we do not lose the last active container
  441. if (null != $this->currentContainer_) {
  442. $fileContainer->add($this->currentContainer_);
  443. }
  444. return $fileContainer;
  445. }
  446. }
  447. ?>