PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/wwwroot/mediawiki/extensions/SpamBlacklist/SpamBlacklist_body.php

https://github.com/spring/spring-website
PHP | 176 lines | 105 code | 15 blank | 56 comment | 12 complexity | 7b9fd21475bbfafba65606a1678ed370 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. if ( !defined( 'MEDIAWIKI' ) ) {
  3. exit;
  4. }
  5. class SpamBlacklist extends BaseBlacklist {
  6. /**
  7. * Returns the code for the blacklist implementation
  8. *
  9. * @return string
  10. */
  11. protected function getBlacklistType() {
  12. return 'spam';
  13. }
  14. /**
  15. * Apply some basic anti-spoofing to the links before they get filtered,
  16. * see @bug 12896
  17. *
  18. * @param string $text
  19. *
  20. * @return string
  21. */
  22. protected function antiSpoof( $text ) {
  23. $text = str_replace( '.', '.', $text );
  24. return $text;
  25. }
  26. /**
  27. * @param string[] $links An array of links to check against the blacklist
  28. * @param Title $title The title of the page to which the filter shall be applied.
  29. * This is used to load the old links already on the page, so
  30. * the filter is only applied to links that got added. If not given,
  31. * the filter is applied to all $links.
  32. * @param boolean $preventLog Whether to prevent logging of hits. Set to true when
  33. * the action is testing the links rather than attempting to save them
  34. * (e.g. the API spamblacklist action)
  35. *
  36. * @return Array Matched text(s) if the edit should not be allowed, false otherwise
  37. */
  38. function filter( array $links, Title $title = null, $preventLog = false ) {
  39. $fname = 'wfSpamBlacklistFilter';
  40. wfProfileIn( $fname );
  41. $blacklists = $this->getBlacklists();
  42. $whitelists = $this->getWhitelists();
  43. if ( count( $blacklists ) ) {
  44. // poor man's anti-spoof, see bug 12896
  45. $newLinks = array_map( array( $this, 'antiSpoof' ), $links );
  46. $oldLinks = array();
  47. if ( $title !== null ) {
  48. $oldLinks = $this->getCurrentLinks( $title );
  49. $addedLinks = array_diff( $newLinks, $oldLinks );
  50. } else {
  51. // can't load old links, so treat all links as added.
  52. $addedLinks = $newLinks;
  53. }
  54. wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
  55. wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
  56. wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
  57. $links = implode( "\n", $addedLinks );
  58. # Strip whitelisted URLs from the match
  59. if( is_array( $whitelists ) ) {
  60. wfDebugLog( 'SpamBlacklist', "Excluding whitelisted URLs from " . count( $whitelists ) .
  61. " regexes: " . implode( ', ', $whitelists ) . "\n" );
  62. foreach( $whitelists as $regex ) {
  63. wfSuppressWarnings();
  64. $newLinks = preg_replace( $regex, '', $links );
  65. wfRestoreWarnings();
  66. if( is_string( $newLinks ) ) {
  67. // If there wasn't a regex error, strip the matching URLs
  68. $links = $newLinks;
  69. }
  70. }
  71. }
  72. # Do the match
  73. wfDebugLog( 'SpamBlacklist', "Checking text against " . count( $blacklists ) .
  74. " regexes: " . implode( ', ', $blacklists ) . "\n" );
  75. $retVal = false;
  76. foreach( $blacklists as $regex ) {
  77. wfSuppressWarnings();
  78. $matches = array();
  79. $check = ( preg_match_all( $regex, $links, $matches ) > 0 );
  80. wfRestoreWarnings();
  81. if( $check ) {
  82. wfDebugLog( 'SpamBlacklist', "Match!\n" );
  83. global $wgRequest;
  84. $ip = $wgRequest->getIP();
  85. $imploded = implode( ' ', $matches[0] );
  86. wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: $imploded\n" );
  87. if( !$preventLog ) {
  88. $this->logFilterHit( $title, $imploded ); // Log it
  89. }
  90. if( $retVal === false ){
  91. $retVal = array();
  92. }
  93. $retVal = array_merge( $retVal, $matches[1] );
  94. }
  95. }
  96. if ( is_array( $retVal ) ) {
  97. $retVal = array_unique( $retVal );
  98. }
  99. } else {
  100. $retVal = false;
  101. }
  102. wfProfileOut( $fname );
  103. return $retVal;
  104. }
  105. /**
  106. * Look up the links currently in the article, so we can
  107. * ignore them on a second run.
  108. *
  109. * WARNING: I can add more *of the same link* with no problem here.
  110. * @param $title Title
  111. * @return array
  112. */
  113. function getCurrentLinks( $title ) {
  114. $dbr = wfGetDB( DB_SLAVE );
  115. $id = $title->getArticleID(); // should be zero queries
  116. $res = $dbr->select( 'externallinks', array( 'el_to' ),
  117. array( 'el_from' => $id ), __METHOD__ );
  118. $links = array();
  119. foreach ( $res as $row ) {
  120. $links[] = $row->el_to;
  121. }
  122. return $links;
  123. }
  124. /**
  125. * Returns the start of the regex for matches
  126. *
  127. * @return string
  128. */
  129. public function getRegexStart() {
  130. return '/(?:https?:)?\/\/+[a-z0-9_\-.]*(';
  131. }
  132. /**
  133. * Returns the end of the regex for matches
  134. *
  135. * @param $batchSize
  136. * @return string
  137. */
  138. public function getRegexEnd( $batchSize ) {
  139. return ')' . parent::getRegexEnd( $batchSize );
  140. }
  141. /**
  142. * Logs the filter hit to Special:Log if
  143. * $wgLogSpamBlacklistHits is enabled.
  144. *
  145. * @param Title $title
  146. * @param string $url URL that the user attempted to add
  147. */
  148. public function logFilterHit( $title, $url ) {
  149. global $wgUser, $wgLogSpamBlacklistHits;
  150. if ( $wgLogSpamBlacklistHits ) {
  151. $logEntry = new ManualLogEntry( 'spamblacklist', 'hit' );
  152. $logEntry->setPerformer( $wgUser );
  153. $logEntry->setTarget( $title );
  154. $logEntry->setParameters( array(
  155. '4::url' => $url,
  156. ) );
  157. $logid = $logEntry->insert();
  158. $logEntry->publish( $logid, "rc" );
  159. }
  160. }
  161. }