PageRenderTime 59ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/classes/page.php

https://github.com/Dispositif/addbot
PHP | 882 lines | 643 code | 56 blank | 183 comment | 116 complexity | c65eab7c9d3396ae75775a070cc6b1b9 MD5 | raw file
  1. <?
  2. require 'parser.php';
  3. class Page {
  4. // construct the page (you probably want to call load after this)
  5. public function __construct($page,$wiki) {
  6. $this->page = preg_replace("/_/"," ",$page);
  7. $this->wiki = $wiki;
  8. $this->parseNamespace();
  9. $this->loadText();//load the wikitext from page
  10. $this->hadMI; //set default until checked
  11. $this->runMI = 0; //the number of time MI has been run
  12. $this->skip = false;
  13. }
  14. // variables
  15. private $page;// page name (e.g. "User:Addshore")
  16. private $text;// page text
  17. private $checktext; //this is a temporary copy of the text that can be checked against with comments and nowiki content removed.
  18. private $namespace;// page namespace (No colon)
  19. private $wiki;// instance of wiki we are using
  20. private $parser;// instance of the parser.php class
  21. private $parsed;
  22. private $sigchange;//has a significant change happened to the page (enough to edit)?
  23. private $summary;//summary if edited
  24. private $hadMI; //did the page have an MI tag when we loaded it
  25. private $runMI; //the number of times MI has been run
  26. public $skip; //have we found something that means we should skip?
  27. // getters and setters
  28. public function getName() { return $this->page; }
  29. public function getText() { return $this->text;}
  30. public function getcheckText() { return $this->checktext;}
  31. public function setText($text) { $this->text = $text;}
  32. public function getNamespace() { if(!isset($this->namespace)){$this->parseNamespace();} return $this->namespace;}
  33. public function hasSigchange() { return $this->sigchange; }
  34. // public functions
  35. // create instance of parser class and parse
  36. public function parse() { $this->parser = new parser($this->getName(),$this->getText()); $this->parsed = $this->parser->parse(); return $this->parsed;}
  37. // private functions
  38. private function loadText() {
  39. $text = $this->wiki->getpage($this->getName());
  40. $this->text = $text;//our actual text
  41. $this->checktext = preg_replace("/(<nowiki>.*?<\/nowiki>|<!--.*?-->)/is","",$text); //text with nonwiki but wiki elements removed
  42. } // load the text from the wiki
  43. private function postPage() { $this->wiki->edit($this->getName(),$this->getText(),$this->getSummary(),true);}
  44. private function parseNamespace()
  45. {
  46. $result = preg_match("/^((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):?/i",$this->page,$matches);
  47. if($result == 0){ $this->namespace = "";}// default to article namespace
  48. else{$this->namespace = $matches[1];}
  49. if($this->namespace == "Image"){ $this->namespace = "File";}// default Image namespace to file
  50. }
  51. public function addSummary($sum,$sig)
  52. {
  53. //split the summary
  54. $split = explode(" ",$sum,2);
  55. //if we CANNOT find the first bit (Adding, Removing, Dating) already in the summary
  56. if(!preg_match('/'.$split[0].'/i',$this->summary))
  57. {
  58. //just add the summary
  59. $this->summary = $this->summary.$sum." ";
  60. }
  61. //else we first bit is already there so we want to insert our second bit
  62. else
  63. {
  64. //replace the first bit in the summary with the first bit and the second bit (a sort of insert)
  65. $this->summary = preg_replace('/'.$split[0].'/i',$sum,$this->summary);
  66. }
  67. //if we want this to be a sig change say so
  68. if($sig)
  69. {
  70. $this->sigchange = true;//if we have a summary it muse be a sig change
  71. }
  72. }
  73. //returns the edit summary
  74. public function getSummary(){
  75. return "[[User:Addbot|Bot:]] ".$this->summary."([[User talk:Addbot|Report Errors]])";
  76. }
  77. // //
  78. // Main bot functions //
  79. // //
  80. //make matching easier
  81. public function matches($regex){return preg_match($regex,$this->getText());}
  82. //return a restricted estimate of words in an article
  83. public function wordcount()
  84. {
  85. //get a temp copy of the text to work with
  86. $text = $this->getText();
  87. //remove everything in sections that we dont want
  88. preg_match_all('/(={2,7})([^=]+)\1/',$text,$sections);
  89. foreach($sections[0] as $key => $header)
  90. {
  91. //if we match a section we dont want then remove it
  92. if(preg_match('/(External links?|References?|Notes?|See also|Bibliography)/i',$sections[2][$key]))
  93. {
  94. if(isset($sections[0][$key+1]))
  95. {
  96. $text = preg_replace('/'.preg_quote($header).'.*?'.$sections[0][$key+1].'/is',"",$text);
  97. }
  98. else
  99. {
  100. $parts = explode($header,$text);
  101. $text = $parts[0];
  102. }
  103. }
  104. }
  105. //remove templates, cats, interwikis and extlinks and refs
  106. $text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
  107. //fill all links in with a single word
  108. $text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
  109. $text = trim($text);
  110. //return
  111. return str_word_count($text);
  112. }
  113. //return a restricted estimate of words in an article
  114. public function wordcountlead()
  115. {
  116. //get a temp copy of the text to work with
  117. $text = $this->getText();
  118. $split = preg_split('/(={2,7})([^=]+)\1/',$text);
  119. $text = $split[0];
  120. //remove templates, cats, interwikis and extlinks and refs
  121. $text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
  122. //fill all links in with a single word
  123. $text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
  124. $text = trim($text);
  125. //return
  126. return str_word_count($text);
  127. }
  128. //returns if the page is a redirect or not
  129. public function isRedirect()
  130. {
  131. if($this->matches('/(# ?REDIRECT ?\[\[.*?\]\]|\{\{Soft ?(redir(ect)?|link)\|)/i'))
  132. {
  133. return true;
  134. }
  135. }
  136. // returns false if the largest section size is smaller than 5000 chars (excluding certain sections)
  137. public function needsSections()
  138. {
  139. global $config;
  140. //init some vars
  141. $largestsection = 0;
  142. $sectioncount = 0;
  143. //find the sections
  144. preg_match_all('/\n==(=)? ?.* ?===?/i',$text, $sections, PREG_PATTERN_ORDER);
  145. $split = preg_split('/\n==(=)? ?.* ?===?/i',$text);
  146. //for each section found
  147. foreach($split as $id => $section){
  148. //if we are the lead
  149. if($id == 0){
  150. $largestsection = strlen($section);
  151. $sectioncount++;
  152. }
  153. //else we must have a name
  154. else{
  155. //make sure we ignore the sections below
  156. if (preg_match('/See ?also|(external( links)?|references|notes|bibliography|further( reading)?)/i',$sections[0][$id-1]) == 0){
  157. //if the length of this section is longer than our current largest
  158. if(strlen($section) > $largestsection){
  159. //then set it
  160. $largestsection = strlen($section);
  161. }
  162. //increment the section count
  163. $sectioncount++;
  164. }
  165. }
  166. }
  167. //if the page has 4+ sections and a largest section of 5000- then return false
  168. if($sectioncount >= $config['Sections']['remsections'] && $largestsection <= $config['Sections']['remlargest']){//was 2750 for AVG
  169. return false;
  170. }elseif($sectioncount <= $config['Sections']['addlargest']/*10000*/ && $largestsection >= $config['Sections']['addsections']/*2*/){//was 2750 for AVG
  171. return true;
  172. }elseif ($sectioncount = 1 && $this->wordcount() >= $config['Sections']['addleadonly']/*1000*/){
  173. return true;
  174. }
  175. }
  176. //returns the number of references that we have
  177. public function isReferenced()
  178. {
  179. $temp = $this->getText();
  180. //remove all ref tags in comments
  181. $temp = preg_replace('/<!--[^(-->)]*?(<\/?ref[^\/]*?>.*?<\/ref>).*?-->/is',"",$temp);
  182. //if we match a ref tag after the ones in comments have been ignored
  183. if(preg_match_all('/<\/?ref[^\/]*?>/is',$temp,$matches))
  184. {
  185. return count($matches);
  186. }
  187. return null;
  188. }
  189. //checks if a page is in a BLP category
  190. public function isBLP()
  191. {
  192. $cats = $this->wiki->categories($this->getName());
  193. foreach ($cats as $cat)
  194. {
  195. //Regex to match cats that are definatly NOT BLP
  196. if(preg_match('/^Category:(Dead people$|[0-9]{0,4}(s BC)? deaths$|(place|year|date)of death (missing|unknown))/i',$cat))
  197. {
  198. return false;
  199. }
  200. //check if we were born over 115 years ago (return false if we are) per [[Wikipedia:Blp#Recently_dead_or_probably_dead]]
  201. if(preg_match("/Category:([0-9]{0,4}) births/i",$cat,$matches))
  202. {
  203. if($matches[1] < date("Y")-$config['BLP']['presumedead'])
  204. {
  205. return false;
  206. }
  207. }
  208. }
  209. foreach ($cats as $cat)
  210. {
  211. //If we are still running see if there is a BLP cat
  212. if (preg_match('/^Category:(Living people$|[0-9]{0,4} births$)/i',$cat))
  213. {
  214. return true;
  215. }
  216. }
  217. //Otherwise we cant guess
  218. return null;
  219. }
  220. //returns true if page is in a given category
  221. public function inCategory($category)
  222. {
  223. $cats = $this->wiki->categories($this->getName());
  224. foreach ($cats as $cat)
  225. {
  226. if ($cat == $category)
  227. {
  228. return true;
  229. }
  230. }
  231. return false;
  232. }
  233. public function interwikilinks()
  234. {
  235. //Make sure wikidta is used
  236. if(!preg_match('/\{\{noexternallanglinks\}\}/',$this->getText()))
  237. {
  238. //get the other links
  239. $r = $this->wiki->wikidatasitelinks($this->getName());
  240. $counter = 0;
  241. $id = "";
  242. //if there is only 1 entity (i.e. the wikidata stuff isnt broken somewhere)
  243. if(count($r) == 1)
  244. {
  245. //foreach entitiy found
  246. foreach($r as $ent)
  247. {
  248. $id = $ent['id'];
  249. //Check if we have site links
  250. if(isset($ent['sitelinks']))
  251. {
  252. //for each sitelink in the entity
  253. foreach ($ent['sitelinks'] as $l)
  254. {
  255. $lang = str_replace("_","-",str_replace("wiki","",$l['site']));
  256. //echo $lang.":".$l['title']."\n";
  257. $link = "\n[[".$lang.":".$l['title']."]]";
  258. if(preg_match('/'.preg_quote($link,'/').'/',$this->getText()))
  259. {
  260. //remove the link
  261. $this->setText(str_replace($link,"",$this->getText()));
  262. //incrememnt the counter
  263. $counter++;
  264. }
  265. }
  266. }
  267. }
  268. if($counter > 1)
  269. {
  270. $this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
  271. }
  272. elseif($counter > 0)
  273. {
  274. $this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
  275. }
  276. //Now we want to log any links left over
  277. preg_match_all('/\[\[(nostalgia|ten|test|aa|ab|ace|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|ba|bar|bat-smg|bcl|be|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|diq|dsb|dv|dz|ee|el|eml|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|got|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jbo|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lt|ltg|lv|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nap|nds|nds-nl|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sc|scn|sco|sd|se|sg|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|udm|ug|uk|ur|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|za|zea|zh|zh-classical|zh-min-nan|zh-yue|zu):([^\]]+)\]\]/i',$this->getText(),$matches);
  278. //if there are still links left over
  279. if(count($matches) > 0)
  280. {
  281. if(count($matches) > 4){$needlog = true;}
  282. $tolog = "";
  283. $needlog = false;
  284. if($id == ""){$tolog .= "=== ".$this->getName()." [https://www.wikidata.org/wiki/Special:CreateItem UNSET] ===\n";$needlog = true;}
  285. else{$tolog .= "=== [[d:$id]] ===\n";}
  286. $tolog .= "* en is [[".$this->getName()."]]\n";
  287. foreach($matches[0] as $key => $match)
  288. {
  289. $tolog .= "** ".$matches[1][$key]." -> ".$matches[2][$key]." [http://www.wikidata.org/w/index.php?title=Special%3AItemByTitle&site=".urlencode($matches[1][$key])."wiki&page=".urlencode($matches[2][$key])." check]\n";
  290. }
  291. //Log
  292. if($needlog)
  293. {
  294. $this->logevent('wikidata',$tolog);
  295. }
  296. }
  297. }
  298. }
  299. }
  300. //TODO ADD THIS TO WIKI CLASS
  301. //Create log function
  302. //This can be used to post output to User:Addbot/log/<PARAM>
  303. //Data will be added to the top of the page in a bulleted list
  304. private function logevent ($type,$what)
  305. {
  306. global $config,$wiki;
  307. //if we are set to log this type
  308. if(isset($config['Log'][$type]))
  309. {
  310. $text = $wiki->getpage('User:'.$config['user'].'/log/'.$config['Log'][$type],null,true);// get previous page
  311. if(strlen($text) > 2)
  312. {
  313. $text = $text."\n".$what;// add our stuff
  314. $split = explode("\n",$what);
  315. if(strlen($text) < $config['Log']['wikidatamax'])
  316. {
  317. $wiki->edit('User:'.$config['user'].'/log/'.$config['Log'][$type],$text,"Adding ".$split[0],true,true,null,true,$config['General']['maxlag']);// save the page
  318. }
  319. }
  320. }
  321. }
  322. // returns true if there are 0 links to the page from the mainspace
  323. // returns false if there is at least 1 link that fits the criteria
  324. public function isOrphan()
  325. {
  326. global $config;
  327. //get the links to the page
  328. $links = $this->wiki->whatlinkshere($this->getName(),"&blnamespace=0");
  329. //if there are no links (i.e. is orphan)
  330. if(count($links) == $config['Orphans']['maxlinks']/*0*/) {
  331. //check the tag is allowed on such a page
  332. if(preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$this->getName()) == FALSE)
  333. {
  334. if (preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->getText()) == FALSE)
  335. {
  336. if(!$this->inCategory("Category:All set index articles"))
  337. {
  338. return true;
  339. }
  340. }
  341. }
  342. }
  343. //if there are links then check them
  344. foreach($links as $link){
  345. //regex names of links to ignore
  346. if(!preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$link))
  347. {
  348. //regex of contents of pages to ignore
  349. if (!preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->wiki->getpage($link)))
  350. {
  351. //Make sure the page is not in cat "All set index articles"
  352. if(!$this->inCategory("Category:All set index articles"))
  353. {
  354. //if we got this far it isnt an orphaned page
  355. return false;
  356. }
  357. }
  358. }
  359. }
  360. return null;
  361. }
  362. // If blue links are found it returns the number of blue links
  363. // returns true if 0 links are found
  364. public function isDeadend()
  365. {
  366. $count = 0;
  367. // match links to articles
  368. preg_match_all('/\[\[([^]]+)\]\]/i',$this->getcheckText(), $links, PREG_PATTERN_ORDER);
  369. foreach($links[1] as $link){
  370. //if this link has been renammed i.e. [[User:Addbot|Bot]]
  371. if(preg_match('/\|/',$link) != 0){
  372. // get the link rather than text name
  373. $split = preg_split('/\|/',$link);
  374. $link = $split[0];
  375. }
  376. //if it doesnt linked to another namespace
  377. if (!preg_match('/((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):/i',$link)){
  378. //and it does look like something we would expect
  379. if(preg_match('/(^:|^w:|^[^:]+$)/i',$link))
  380. {
  381. //count it
  382. $count++;
  383. }
  384. }
  385. }
  386. if($count == 0){return true;}
  387. else{return $count;}
  388. }
  389. // returns true is 0 categories are found
  390. // returns false if more than one is found
  391. public function isUncat()
  392. {
  393. // get cats for this page
  394. $cats = $this->wiki->categories($this->getName(),false);
  395. if(count($cats) == 0)
  396. {
  397. //if not cats at all then TRUE (IS UNCAT)
  398. return true;
  399. }
  400. else
  401. {
  402. foreach($cats as $cat)
  403. {
  404. if(!preg_match('/^Category:(.*?Proposed (for )?deletion.*?|(|.*? )stubs$)/i',$cat))
  405. {
  406. //if it is not a stub cat return FALSE (NOT UNCAT)
  407. return false;
  408. }
  409. }
  410. //If we haven't hit anything else then we must be uncat
  411. return true;
  412. }
  413. }
  414. //return true if the page is appended by .pdf
  415. public function isPdf()
  416. {
  417. if( preg_match("/\.pdf$/i",$this->getName()))
  418. {
  419. return true;
  420. }
  421. }
  422. //add the given template from the page if it doesnt already exist
  423. //passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
  424. public function addTag($template,$section=null)
  425. {
  426. //make sure the tag is not already on the page
  427. if(preg_match('/'.$template->regexTemplate().'/i',$this->getText()) || preg_match('/'.$template->regexTempIssues().'/i',$this->getText())){ return false; }
  428. //make sure the template's notif is not on the page
  429. if($template->regexNotif() != false) {if(preg_match("/".$template->regexNotif()."/i",$this->getText())){return false;}}
  430. //check if we want to add the tag below a section
  431. if($section)
  432. {
  433. //does the section exist?
  434. if(preg_match ("/== ?".$section." ?==/i",$this->text))
  435. {
  436. //then add the tag
  437. $matches = preg_match ("/== ?".$section." ?==/i",$this->getText());
  438. $pieces = preg_split("/== ?".$section." ?==/i",$this->getText());
  439. $this->text = $pieces[0]."==".$matches[1]."==\n".$template->getPost()." ".$pieces[1];
  440. }
  441. else // else we can just make the section
  442. {
  443. $this->text = "==".$section."==\n".$template->getPost()."\n" .$this->getText();
  444. }
  445. }
  446. else// else just add it to the top of the page
  447. {
  448. $this->text = $template->getPost()."\n" .$this->getText();
  449. }
  450. // add to the summary for the edit
  451. $this->addSummary("Adding {{".$template->getName()."}}",true);
  452. }
  453. //passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
  454. public function removeTag($template)
  455. {
  456. $this->removeRegex('/'.$template->regexTemplate().'/i',"Removing {{".$template->getName()."}}");
  457. }
  458. //remove the regex match from the page
  459. //if summary is set then add to edit summary
  460. public function removeRegex($regex,$summary = null)
  461. {
  462. if(preg_match($regex,$this->getText()))//make sure the regex is actually there
  463. {//if it is remove and say so
  464. $this->setText(preg_replace($regex,"",preg_replace($regex,"",$this->getText())));
  465. if($summary != null)
  466. {//if summary not null then we can add a summary
  467. $this->addSummary($summary,true);
  468. }
  469. }
  470. }
  471. //checks if a page is a sandbox
  472. public function isSandbox()
  473. {
  474. global $config;
  475. //check for each sandbox defined
  476. foreach($config['sandbox'] as $sandbox)
  477. {
  478. //if we hit one of our sandboxes
  479. if($sandbox['name'] == $this->getName())
  480. {
  481. return true;
  482. }
  483. }
  484. }
  485. //restores the header of a sandbox
  486. public function restoreHeader()
  487. {
  488. global $config;
  489. $sandbox = $config['sandbox'][$this->getName()];
  490. //get the shouldbe header
  491. $shouldbe = $this->wiki->getpage($sandbox['name'],$sandbox['id']);
  492. //If the required header is not at the top of the page
  493. if(!preg_match('/^'.preg_quote($shouldbe).'/s',$this->getText()))
  494. {
  495. //Post it to the top removing any other match of it
  496. $this->setText($shouldbe."\n".preg_replace('/'.preg_quote($shouldbe).'/is',"",$this->getText()));
  497. $this->addSummary("Restoring sandbox header",true);
  498. return true;
  499. }
  500. }
  501. //parse MI tag, add tags to MI, remove MI if not needed
  502. public function multipleIssues()
  503. {
  504. global $config;
  505. $this->runMI = $this->runMI + 1;
  506. $removed = 0;
  507. $hat = "";//for storing nay hat notes in
  508. $mi = "";//this will be used to store what we want to add to the page
  509. $hadMI = null;//did we have MI tag before this check?
  510. //parse the page
  511. $this->parse(); // work with $this->parsed;
  512. //for each template on the page
  513. foreach($this->parsed['wikObject_templates'] as $x)
  514. {
  515. //make sure the template is not surrounded by comment tags
  516. if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
  517. {
  518. //does it match the MI template
  519. if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)$/i',$x->name))
  520. {
  521. //Update hadMI if not already set
  522. if($this->hadMI === null){$this->hadMI = true;}
  523. //IS the MI tag empty?
  524. if(preg_match('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',$x->rawCode))
  525. {
  526. //remove and stop
  527. $this->text = preg_replace('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',"",$this->getText());
  528. if($this->hadMI === true && $this->runMI == 2)
  529. {
  530. $this->addSummary("Removing {{Multiple issues}}",true);
  531. }
  532. return null;
  533. }
  534. else//else we must be a new MI style (or a mixture of both)
  535. {
  536. //the parse accordingly
  537. foreach($x->arguments as $tagarg)
  538. {
  539. if(!preg_match('/\{/',$tagarg))//if the arg is old style, add it correctly
  540. {
  541. //if the tagarg doesnt have a date i.e. 'expert = canada' we need to get the date in the 'date = <DATE>' parameter
  542. if(!preg_match('/(.*?) ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])/i',$tagarg))
  543. {
  544. //add it with a <DATE> placeholder
  545. $mi = $mi."{{".trim(preg_replace('/ ?= ?/','|',$tagarg))."|date=<DATE>}}\n";
  546. }
  547. else//else it does have a date
  548. {
  549. //if it is not just a date
  550. if(!preg_match('/^date ?= ?/i',$tagarg))
  551. {
  552. //add it normally
  553. $mi = $mi."{{".trim(preg_replace('/ ?= ?/','|date=',$tagarg))."}}\n";
  554. }
  555. //else it must be a date
  556. else
  557. {
  558. //dont add the date but replace <DATE> in the string with the date
  559. $mi = str_replace("<DATE>",trim(preg_replace('/date ?= ?/','',$tagarg)),$mi);
  560. }
  561. }
  562. }
  563. else
  564. {
  565. //just add it
  566. //After a lot of research and testing it turns out the MI tag is allowed 1 parameter with templates in
  567. //Although this can be in the same MI tag as non template 'old style' paramemters
  568. $mi = $mi.$tagarg;
  569. }
  570. }
  571. $removed = $removed + $x->attributes['length'];
  572. $this->text = str_replace($x->rawCode,'',$this->getText());
  573. }
  574. $mi = preg_replace("/\n/","",$mi);//get rid of new lines
  575. }
  576. //else do we match any hatnotes
  577. elseif(preg_match('/^(Template:)?(Hatnote|Reflink|Main(( |_)list)?|Details3?|See( |_)also2?|Further2?|About|Other( |_)uses-section|For|((Two|Three) )?Other( |_)uses|Other uses of|Redirect[0-1]?[0-9]|Redirect(-|_| )(synomym|text|distinguish2?)|Consider( |_)disambiguation|Other( |_)(uses|people|places|hurricanes|ships|)[1-5]?|(Redirect-)?Distinguish|Selfref|Category( |_)(see also|explanation|pair)|Cat( |_)main|cat(preceding|succeeding)|contrast|This( |_)user( |_)talk)/i',$x->name))
  578. {
  579. //make sure the hat note is not under a section
  580. if(!preg_match('/\n==.*?{{'.$x->name.'/is',$this->getText()))
  581. {
  582. if(strlen($x->rawCode) < 4)
  583. {
  584. //skip page
  585. $this->skip = true;
  586. }
  587. //remember our hatnotes
  588. $hat = $hat.$x->rawCode."\n";
  589. //remove the hatnote matched (we will re add later)
  590. $removed = $removed + $x->attributes['length'];
  591. $this->text = str_replace($x->rawCode,'',$this->getText());
  592. }
  593. }
  594. else// else if we match a tag to go in MI
  595. {
  596. //check for all of our defined tags
  597. foreach($config['mitag'] as $tag)
  598. {
  599. //if it is one of our tags
  600. if(preg_match("/^".$tag->regexName()."$/i",$x->name) == true)
  601. {
  602. //if we have a section param ignore the tag
  603. if(preg_match("/\|(sections|sect?)/i",$x->rawCode) == false)
  604. {
  605. //remove the tag from page and add to our output
  606. $mi = $mi.$x->rawCode;
  607. $this->text = substr_replace($this->getText(),"",$x->attributes['start']-$removed-1,$x->attributes['length']);
  608. $removed = $removed + $x->attributes['length'];
  609. }
  610. }
  611. }
  612. }
  613. }
  614. }
  615. //Update hadMI if not already set
  616. if($this->hadMI === null){$this->hadMI = false;}
  617. //crappy way to make sure we split at every tag
  618. $mi = preg_replace('/\}\}/',"}}\n",$mi);
  619. //split into each tag (might be joined if from MI)
  620. $split = preg_split("/\n/",$mi,0,PREG_SPLIT_NO_EMPTY);
  621. //If there is at least 2 tags
  622. if(count($split) > 1)
  623. {
  624. //add them to a MI tag
  625. $mi = "{{Multiple issues|\n";//start mi
  626. foreach ($split as $tag)
  627. {
  628. //Check the exact same thing is not already there
  629. if(!strstr($mi,$tag))
  630. {
  631. //TODO: now we want to make sure the same thing is not there just with a different date (also account for redirects)
  632. $mi = $mi.$tag."\n";//add each tag
  633. }
  634. }
  635. $mi = $mi."}}";//add the end of the tag
  636. if($this->hadMI === false && $this->runMI == 2)
  637. {
  638. //if we have already had a sig change or we have more than 2 tags
  639. if($this->sigchange == true || count($split) > 2)
  640. {
  641. //we can edit
  642. $this->addSummary("Adding {{Multiple issues}}",true);
  643. }
  644. }
  645. }
  646. //if only 1 we dont want to use multiple issues
  647. elseif(count($split) == 1)
  648. {
  649. //just add the single tag
  650. $mi = $split[0];
  651. if($this->hadMI === true && $this->runMI == 2)
  652. {
  653. $this->addSummary("Removing {{Multiple issues}}",true);
  654. }
  655. }
  656. //add to origional text with any hatnotes
  657. $this->text = $hat.$mi."\n".$this->getText();
  658. }
  659. //parse MI tag, add tags to MI, remove MI if not needed
  660. public function multipleIssuesDupes()
  661. {
  662. global $config;
  663. //parse the page
  664. $mi['name'] = Array();
  665. $mi['date'] = Array();
  666. $mi['params'] = Array();
  667. $this->parse(); // work with $this->parsed;
  668. //for each template on the page
  669. foreach($this->parsed['wikObject_templates'] as $x)
  670. {
  671. //make sure the template is not surrounded by comment tags
  672. if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
  673. {
  674. //does it match the MI template
  675. if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)/i',$x->name))
  676. {
  677. $c = 0;
  678. //the parse accordingly
  679. foreach($x->arguments as $tagarg)
  680. {
  681. $each = explode("\n",$tagarg);
  682. foreach ($each as $tag)
  683. {
  684. if(preg_match('/\{\{([^\|]+)(.*?)\|date ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])(.*?)\}\}/i',$tag,$matches))
  685. {
  686. //if its not already in the arry or it matches a template to ignore
  687. if(!in_array($matches[1],$mi['name']) || preg_match('/((cleanup-)?expert(_| |-|)(attention|subject|article|portal|verify|))/i',$matches[1]))
  688. {
  689. //add it
  690. $mi['name'][$c] = $matches[1];
  691. $mi['date'][$c] = $matches[3];
  692. $mi['params'][$c] = $matches[2].$matches[5];
  693. $c++;
  694. }
  695. else
  696. {
  697. $search = array_search($matches[1],$mi['name']);
  698. //find which date is oldest and keep it
  699. if(strtotime($matches[3]) < strtotime($mi['date'][$search]))
  700. {
  701. //change the date
  702. $mi['date'][$search] = $matches[3];
  703. //and if no parameters were given before
  704. if($mi['params'][$search] == "")
  705. {
  706. //give ours
  707. $mi['params'][$search] = $matches[2].$matches[5];
  708. }
  709. }
  710. }
  711. }
  712. //else it doesnt match a nice date format so just ignore it for now
  713. else
  714. {
  715. //add it
  716. $mi['name'][$c] = $matches[1];
  717. $mi['date'][$c] = $matches[3];
  718. $mi['params'][$c] = $matches[2].$matches[5];
  719. $c++;
  720. }
  721. }
  722. }
  723. //find our new MI tag
  724. $torep = "{{Multiple issues|\n";
  725. foreach($mi['name'] as $key => $name)
  726. {
  727. $torep.= "{{".$mi['name'][$key].$mi['params'][$key]."|date=".$mi['date'][$key]."}}\n";
  728. }
  729. $torep .= "}}";
  730. //replace the old with the new
  731. if(strlen($x->rawCode)-10 > strlen($torep))
  732. {
  733. $this->setText(str_replace($x->rawCode,$torep,$this->getText()));
  734. $this->addSummary("Removing Duplicate tags",true);
  735. }
  736. }
  737. }
  738. }
  739. }
  740. //http://en.wikipedia.org/w/index.php?title=Wikipedia:AutoEd/whitespace.js&action=raw&ctype=text/javascript
  741. public function fixWhitespace()
  742. {
  743. $this->text = preg_replace('/(\n\n)\n+/',"$1", $this->getText() );
  744. //remove leading white space
  745. $this->text = preg_replace('/^(\n|\r){0,5}/',"", $this->getText() );
  746. }
  747. public function fixTemplates()
  748. {
  749. $this->text = preg_replace( '/\{\{(?:Template:)?(Dab|Disamb|Disambiguation)\}\}/iS', "{{Disambig}}", $this->text );
  750. $this->text = preg_replace( '/\{\{(?:Template:)?(Bio-dab|Hndisambig)/iS', "{{Hndis", $this->text );
  751. $this->text = preg_replace( '/\{\{(?:Template:)?(Prettytable|Prettytable100)\}\}/iS', "{{subst:Prettytable}}", $this->text );
  752. $this->text = preg_replace( '/\{\{(?:[Tt]emplate:)?((?:BASE)?PAGENAMEE?\}\}|[Ll]ived\||[Bb]io-cats\|)/iS', "{{subst:$1", $this->text );
  753. $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Dd]o-attempt\s*=\s*)[^{}\|]+\|\s*att\s*=\s*([^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
  754. $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Cc]opyedit\s*)for\s*=\s*[^{}\|]+\|\s*date(\s*=[^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
  755. $this->text = preg_replace( '/\{\{[Aa]rticle ?issues(?:\s*\|\s*(?:section|article)\s*=\s*[Yy])?\s*\}\}/iS', "", $this->text );
  756. $this->text = preg_replace( '/\{\{[Cc]ommons\|\s*[Cc]ategory:\s*([^{}]+?)\s*\}\}/iS', "{{Commons category|$1}}", $this->text );
  757. $this->text = preg_replace( '/(?!{{[Cc]ite wikisource)(\{\{\s*(?:[Cc]it[ae]|[Aa]rticle ?issues)[^{}]*)\|\s*(\}\}|\|)/iS', "$1$2", $this->text );
  758. $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))/iS', "$1$4$2$5$3$6", $this->text );
  759. $this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{]+(?:\|[^{}]+?)?)\|\s*\2\s*=\s*(\||\}\})/iS', "$1$2$3$4", $this->text );
  760. $this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])/iS', "$1$3", $this->text );
  761. $this->text = preg_replace( '/{{\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited)(?=\s*[\|}])/S', "{{Citation needed", $this->text );
  762. }
  763. public function fixDateTags()
  764. {
  765. global $config;
  766. //get a copy of the text to change
  767. $text = $this->getText();
  768. //get the current month and year
  769. $date = date("F Y");
  770. $month = date("F");
  771. //check each tag we have to see if it needs to be dated
  772. foreach ($config['mitag'] as $tag)
  773. {
  774. //if the tag can be found without a date
  775. if($this->matches('/\{\{(Template:)?'.$tag->regexName().'/i'))
  776. {
  777. //date tags with no args at all
  778. $text = preg_replace('/\{\{(Template:)?'.$tag->regexName().'\}\}/i',"{{".$tag->getName()."|date=$date}}",$text);
  779. //$this->setText(preg_replace('/\{\{(Template:)?'.$tag->regexName().'([^}]*?)\}\}/is',"{{".$tag->getName()."|date=$date$3}}",$text));
  780. }
  781. }
  782. //If a tag has been dated
  783. if(strlen($text) > strlen($this->getText())+5)
  784. {
  785. $this->text = $text;
  786. echo "+";
  787. $this->addSummary("Dating Tags",true);
  788. }
  789. }
  790. public function fixSectionTags()
  791. {
  792. global $config;
  793. //check each tag we have to see if it needs to be dated
  794. foreach ($config['mitag'] as $tag)
  795. {
  796. //if it matches something that can be under a section and not matter
  797. if(!preg_match('/(unreferenced|emptysection|refimprove|unsourced|footnotes|uncategorized)/i',$tag->getName()))
  798. {
  799. //if the tag can be found below a section
  800. if($this->matches('/(==.*?)\{\{(Template:)?'.$tag->regexName().'(?!sect(ions?)?)[^}]*?\}\}/is'))//todo
  801. {
  802. //add the section parameter to the template if it isnt there already
  803. $this->setText(preg_replace('/(==.*?)\{\{(Template:)?'.$tag->regexName().'((?!sect(ions?)?)[^}]*?)\}\}/is',"$1{{".$tag->getName()."$4|section}}",$this->getText()));
  804. }
  805. }
  806. }
  807. }
  808. public function fixGeneral()
  809. {
  810. global $config;
  811. //Fix headers
  812. //TODO FIX THESE
  813. //$this->text = preg_replace('/== ?External ?links? ?==/i', "==External links==", $this->text );
  814. //$this->text = preg_replace('/== ?Further ?readings? ?==/i', "==Further reading==", $this->text );
  815. //Templates
  816. $this->text = preg_replace('/\{\{'.$config['mitag']['unreferenced']->regexName().'\|section\}\}/i', "{{Unreferenced section}}", $this->text );
  817. if(!$this->matches('/\{\{reflist/i'))
  818. {$this->text = preg_replace('/<references ?\/>/i',"{{reflist}}", $this->text );}
  819. }
  820. public function preChecks()
  821. {
  822. $this->text = str_ireplace("<!-- Automatically added by User:SoxBot. If this is an error, please contact User:Soxred93 -->","",$this->text);
  823. $this->text = preg_replace("/<!-- ?interwikis?( links?)? ?-->/i","",$this->text);
  824. }
  825. }
  826. ?>