PageRenderTime 25ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/maintenance/findHooks.php

https://bitbucket.org/andersus/querytalogo
PHP | 353 lines | 219 code | 31 blank | 103 comment | 26 complexity | 7cc89b5af9b2eebb68708fd3b9eb4d9d MD5 | raw file
Possible License(s): LGPL-3.0, MPL-2.0-no-copyleft-exception, JSON, MIT, CC0-1.0, BSD-3-Clause, Apache-2.0, BSD-2-Clause, LGPL-2.1, GPL-2.0
  1. <?php
  2. /**
  3. * Simple script that try to find documented hook and hooks actually
  4. * in the code and show what's missing.
  5. *
  6. * This script assumes that:
  7. * - hooks names in hooks.txt are at the beginning of a line and single quoted.
  8. * - hooks names in code are the first parameter of wfRunHooks.
  9. *
  10. * if --online option is passed, the script will compare the hooks in the code
  11. * with the ones at https://www.mediawiki.org/wiki/Manual:Hooks
  12. *
  13. * Any instance of wfRunHooks that doesn't meet these parameters will be noted.
  14. *
  15. * Copyright © Antoine Musso
  16. *
  17. * This program is free software; you can redistribute it and/or modify
  18. * it under the terms of the GNU General Public License as published by
  19. * the Free Software Foundation; either version 2 of the License, or
  20. * (at your option) any later version.
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. *
  27. * You should have received a copy of the GNU General Public License along
  28. * with this program; if not, write to the Free Software Foundation, Inc.,
  29. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  30. * http://www.gnu.org/copyleft/gpl.html
  31. *
  32. * @file
  33. * @ingroup Maintenance
  34. * @author Antoine Musso <hashar at free dot fr>
  35. */
  36. require_once __DIR__ . '/Maintenance.php';
  37. /**
  38. * Maintenance script that compares documented and actually present mismatches.
  39. *
  40. * @ingroup Maintenance
  41. */
  42. class FindHooks extends Maintenance {
  43. const FIND_NON_RECURSIVE = 0;
  44. const FIND_RECURSIVE = 1;
  45. /*
  46. * Hooks that are ignored
  47. */
  48. protected static $ignore = [ 'Test' ];
  49. public function __construct() {
  50. parent::__construct();
  51. $this->addDescription( 'Find hooks that are undocumented, missing, or just plain wrong' );
  52. $this->addOption( 'online', 'Check against MediaWiki.org hook documentation' );
  53. }
  54. public function getDbType() {
  55. return Maintenance::DB_NONE;
  56. }
  57. public function execute() {
  58. global $IP;
  59. $documentedHooks = $this->getHooksFromDoc( $IP . '/docs/hooks.txt' );
  60. $potentialHooks = [];
  61. $badHooks = [];
  62. $recurseDirs = [
  63. "$IP/includes/",
  64. "$IP/mw-config/",
  65. "$IP/languages/",
  66. "$IP/maintenance/",
  67. // Omit $IP/tests/phpunit as it contains hook tests that shouldn't be documented
  68. "$IP/tests/parser",
  69. "$IP/tests/phpunit/suites",
  70. ];
  71. $nonRecurseDirs = [
  72. "$IP/",
  73. ];
  74. $extraFiles = [
  75. "$IP/tests/phpunit/MediaWikiTestCase.php",
  76. ];
  77. foreach ( $recurseDirs as $dir ) {
  78. $ret = $this->getHooksFromDir( $dir, self::FIND_RECURSIVE );
  79. $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
  80. $badHooks = array_merge( $badHooks, $ret['bad'] );
  81. }
  82. foreach ( $nonRecurseDirs as $dir ) {
  83. $ret = $this->getHooksFromDir( $dir );
  84. $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
  85. $badHooks = array_merge( $badHooks, $ret['bad'] );
  86. }
  87. foreach ( $extraFiles as $file ) {
  88. $potentialHooks = array_merge( $potentialHooks, $this->getHooksFromFile( $file ) );
  89. $badHooks = array_merge( $badHooks, $this->getBadHooksFromFile( $file ) );
  90. }
  91. $documented = array_keys( $documentedHooks );
  92. $potential = array_keys( $potentialHooks );
  93. $potential = array_unique( $potential );
  94. $badHooks = array_diff( array_unique( $badHooks ), self::$ignore );
  95. $todo = array_diff( $potential, $documented, self::$ignore );
  96. $deprecated = array_diff( $documented, $potential, self::$ignore );
  97. // Check parameter count and references
  98. $badParameterCount = $badParameterReference = [];
  99. foreach ( $potentialHooks as $hook => $args ) {
  100. if ( !isset( $documentedHooks[$hook] ) ) {
  101. // Not documented, but that will also be in $todo
  102. continue;
  103. }
  104. $argsDoc = $documentedHooks[$hook];
  105. if ( $args === 'unknown' || $argsDoc === 'unknown' ) {
  106. // Could not get parameter information
  107. continue;
  108. }
  109. if ( count( $argsDoc ) !== count( $args ) ) {
  110. $badParameterCount[] = $hook . ': Doc: ' . count( $argsDoc ) . ' vs. Code: ' . count( $args );
  111. } else {
  112. // Check if & is equal
  113. foreach ( $argsDoc as $index => $argDoc ) {
  114. $arg = $args[$index];
  115. if ( ( $arg[0] === '&' ) !== ( $argDoc[0] === '&' ) ) {
  116. $badParameterReference[] = $hook . ': References different: Doc: ' . $argDoc .
  117. ' vs. Code: ' . $arg;
  118. }
  119. }
  120. }
  121. }
  122. // Print the results
  123. $this->printArray( 'Undocumented', $todo );
  124. $this->printArray( 'Documented and not found', $deprecated );
  125. $this->printArray( 'Unclear hook calls', $badHooks );
  126. $this->printArray( 'Different parameter count', $badParameterCount );
  127. $this->printArray( 'Different parameter reference', $badParameterReference );
  128. if ( !$todo && !$deprecated && !$badHooks
  129. && !$badParameterCount && !$badParameterReference
  130. ) {
  131. $this->output( "Looks good!\n" );
  132. } else {
  133. $this->error( 'The script finished with errors.', 1 );
  134. }
  135. }
  136. /**
  137. * Get the hook documentation, either locally or from MediaWiki.org
  138. * @param string $doc
  139. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  140. */
  141. private function getHooksFromDoc( $doc ) {
  142. if ( $this->hasOption( 'online' ) ) {
  143. return $this->getHooksFromOnlineDoc();
  144. } else {
  145. return $this->getHooksFromLocalDoc( $doc );
  146. }
  147. }
  148. /**
  149. * Get hooks from a local file (for example docs/hooks.txt)
  150. * @param string $doc Filename to look in
  151. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  152. */
  153. private function getHooksFromLocalDoc( $doc ) {
  154. $m = [];
  155. $content = file_get_contents( $doc );
  156. preg_match_all(
  157. "/\n'(.*?)':.*((?:\n.+)*)/",
  158. $content,
  159. $m,
  160. PREG_SET_ORDER
  161. );
  162. // Extract the documented parameter
  163. $hooks = [];
  164. foreach ( $m as $match ) {
  165. $args = [];
  166. if ( isset( $match[2] ) ) {
  167. $n = [];
  168. if ( preg_match_all( "/\n(&?\\$\w+):.+/", $match[2], $n ) ) {
  169. $args = $n[1];
  170. }
  171. }
  172. $hooks[$match[1]] = $args;
  173. }
  174. return $hooks;
  175. }
  176. /**
  177. * Get hooks from www.mediawiki.org using the API
  178. * @return array Array: key => hook name; value => string 'unknown'
  179. */
  180. private function getHooksFromOnlineDoc() {
  181. $allhooks = $this->getHooksFromOnlineDocCategory( 'MediaWiki_hooks' );
  182. $removed = $this->getHooksFromOnlineDocCategory( 'Removed_hooks' );
  183. return array_diff_key( $allhooks, $removed );
  184. }
  185. /**
  186. * @param string $title
  187. * @return array
  188. */
  189. private function getHooksFromOnlineDocCategory( $title ) {
  190. $params = [
  191. 'action' => 'query',
  192. 'list' => 'categorymembers',
  193. 'cmtitle' => "Category:$title",
  194. 'cmlimit' => 500,
  195. 'format' => 'json',
  196. 'continue' => '',
  197. ];
  198. $retval = [];
  199. while ( true ) {
  200. $json = Http::get(
  201. wfAppendQuery( 'http://www.mediawiki.org/w/api.php', $params ),
  202. [],
  203. __METHOD__
  204. );
  205. $data = FormatJson::decode( $json, true );
  206. foreach ( $data['query']['categorymembers'] as $page ) {
  207. if ( preg_match( '/Manual\:Hooks\/([a-zA-Z0-9- :]+)/', $page['title'], $m ) ) {
  208. // parameters are unknown, because that needs parsing of wikitext
  209. $retval[str_replace( ' ', '_', $m[1] )] = 'unknown';
  210. }
  211. }
  212. if ( !isset( $data['continue'] ) ) {
  213. return $retval;
  214. }
  215. $params = array_replace( $params, $data['continue'] );
  216. }
  217. }
  218. /**
  219. * Get hooks from a PHP file
  220. * @param string $filePath Full file path to the PHP file.
  221. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  222. */
  223. private function getHooksFromFile( $filePath ) {
  224. $content = file_get_contents( $filePath );
  225. $m = [];
  226. preg_match_all(
  227. // All functions which runs hooks
  228. '/(?:wfRunHooks|Hooks\:\:run)\s*\(\s*' .
  229. // First argument is the hook name as string
  230. '([\'"])(.*?)\1' .
  231. // Comma for second argument
  232. '(?:\s*(,))?' .
  233. // Second argument must start with array to be processed
  234. '(?:\s*(?:array\s*\(|\[)' .
  235. // Matching inside array - allows one deep of brackets
  236. '((?:[^\(\)\[\]]|\((?-1)\)|\[(?-1)\])*)' .
  237. // End
  238. '[\)\]])?/',
  239. $content,
  240. $m,
  241. PREG_SET_ORDER
  242. );
  243. // Extract parameter
  244. $hooks = [];
  245. foreach ( $m as $match ) {
  246. $args = [];
  247. if ( isset( $match[4] ) ) {
  248. $n = [];
  249. if ( preg_match_all( '/((?:[^,\(\)]|\([^\(\)]*\))+)/', $match[4], $n ) ) {
  250. $args = array_map( 'trim', $n[1] );
  251. // remove empty entries from trailing spaces
  252. $args = array_filter( $args );
  253. }
  254. } elseif ( isset( $match[3] ) ) {
  255. // Found a parameter for Hooks::run,
  256. // but could not extract the hooks argument,
  257. // because there are given by a variable
  258. $args = 'unknown';
  259. }
  260. $hooks[$match[2]] = $args;
  261. }
  262. return $hooks;
  263. }
  264. /**
  265. * Get bad hooks (where the hook name could not be determined) from a PHP file
  266. * @param string $filePath Full filename to the PHP file.
  267. * @return array Array of bad wfRunHooks() lines
  268. */
  269. private function getBadHooksFromFile( $filePath ) {
  270. $content = file_get_contents( $filePath );
  271. $m = [];
  272. // We want to skip the "function wfRunHooks()" one. :)
  273. preg_match_all( '/(?<!function )wfRunHooks\(\s*[^\s\'"].*/', $content, $m );
  274. $list = [];
  275. foreach ( $m[0] as $match ) {
  276. $list[] = $match . "(" . $filePath . ")";
  277. }
  278. return $list;
  279. }
  280. /**
  281. * Get hooks from a directory of PHP files.
  282. * @param string $dir Directory path to start at
  283. * @param int $recursive Pass self::FIND_RECURSIVE
  284. * @return array Array: key => hook name; value => array of arguments or string 'unknown'
  285. */
  286. private function getHooksFromDir( $dir, $recurse = 0 ) {
  287. $good = [];
  288. $bad = [];
  289. if ( $recurse === self::FIND_RECURSIVE ) {
  290. $iterator = new RecursiveIteratorIterator(
  291. new RecursiveDirectoryIterator( $dir, RecursiveDirectoryIterator::SKIP_DOTS ),
  292. RecursiveIteratorIterator::SELF_FIRST
  293. );
  294. } else {
  295. $iterator = new DirectoryIterator( $dir );
  296. }
  297. foreach ( $iterator as $info ) {
  298. // Ignore directories, work only on php files,
  299. if ( $info->isFile() && in_array( $info->getExtension(), [ 'php', 'inc' ] )
  300. // Skip this file as it contains text that looks like a bad wfRunHooks() call
  301. && $info->getRealPath() !== __FILE__
  302. ) {
  303. $good = array_merge( $good, $this->getHooksFromFile( $info->getRealPath() ) );
  304. $bad = array_merge( $bad, $this->getBadHooksFromFile( $info->getRealPath() ) );
  305. }
  306. }
  307. return [ 'good' => $good, 'bad' => $bad ];
  308. }
  309. /**
  310. * Nicely sort an print an array
  311. * @param string $msg A message to show before the value
  312. * @param array $arr
  313. */
  314. private function printArray( $msg, $arr ) {
  315. asort( $arr );
  316. foreach ( $arr as $v ) {
  317. $this->output( "$msg: $v\n" );
  318. }
  319. }
  320. }
  321. $maintClass = 'FindHooks';
  322. require_once RUN_MAINTENANCE_IF_MAIN;