/classes/page.php
PHP | 882 lines | 643 code | 56 blank | 183 comment | 116 complexity | c65eab7c9d3396ae75775a070cc6b1b9 MD5 | raw file
- <?
- require 'parser.php';
- class Page {
- // construct the page (you probably want to call load after this)
- public function __construct($page,$wiki) {
- $this->page = preg_replace("/_/"," ",$page);
- $this->wiki = $wiki;
- $this->parseNamespace();
- $this->loadText();//load the wikitext from page
- $this->hadMI; //set default until checked
- $this->runMI = 0; //the number of time MI has been run
- $this->skip = false;
- }
-
- // variables
- private $page;// page name (e.g. "User:Addshore")
- private $text;// page text
- private $checktext; //this is a temporary copy of the text that can be checked against with comments and nowiki content removed.
- private $namespace;// page namespace (No colon)
- private $wiki;// instance of wiki we are using
- private $parser;// instance of the parser.php class
- private $parsed;
- private $sigchange;//has a significant change happened to the page (enough to edit)?
- private $summary;//summary if edited
- private $hadMI; //did the page have an MI tag when we loaded it
- private $runMI; //the number of times MI has been run
- public $skip; //have we found something that means we should skip?
-
- // getters and setters
- public function getName() { return $this->page; }
- public function getText() { return $this->text;}
- public function getcheckText() { return $this->checktext;}
- public function setText($text) { $this->text = $text;}
- public function getNamespace() { if(!isset($this->namespace)){$this->parseNamespace();} return $this->namespace;}
- public function hasSigchange() { return $this->sigchange; }
-
- // public functions
- // create instance of parser class and parse
- public function parse() { $this->parser = new parser($this->getName(),$this->getText()); $this->parsed = $this->parser->parse(); return $this->parsed;}
-
- // private functions
- private function loadText() {
- $text = $this->wiki->getpage($this->getName());
- $this->text = $text;//our actual text
- $this->checktext = preg_replace("/(<nowiki>.*?<\/nowiki>|<!--.*?-->)/is","",$text); //text with nonwiki but wiki elements removed
- } // load the text from the wiki
- private function postPage() { $this->wiki->edit($this->getName(),$this->getText(),$this->getSummary(),true);}
- private function parseNamespace()
- {
- $result = preg_match("/^((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):?/i",$this->page,$matches);
- if($result == 0){ $this->namespace = "";}// default to article namespace
- else{$this->namespace = $matches[1];}
- if($this->namespace == "Image"){ $this->namespace = "File";}// default Image namespace to file
- }
-
- public function addSummary($sum,$sig)
- {
- //split the summary
- $split = explode(" ",$sum,2);
- //if we CANNOT find the first bit (Adding, Removing, Dating) already in the summary
- if(!preg_match('/'.$split[0].'/i',$this->summary))
- {
- //just add the summary
- $this->summary = $this->summary.$sum." ";
- }
- //else we first bit is already there so we want to insert our second bit
- else
- {
- //replace the first bit in the summary with the first bit and the second bit (a sort of insert)
- $this->summary = preg_replace('/'.$split[0].'/i',$sum,$this->summary);
- }
- //if we want this to be a sig change say so
- if($sig)
- {
- $this->sigchange = true;//if we have a summary it muse be a sig change
- }
- }
-
- //returns the edit summary
- public function getSummary(){
- return "[[User:Addbot|Bot:]] ".$this->summary."([[User talk:Addbot|Report Errors]])";
- }
-
- // //
- // Main bot functions //
- // //
- //make matching easier
- public function matches($regex){return preg_match($regex,$this->getText());}
-
- //return a restricted estimate of words in an article
- public function wordcount()
- {
- //get a temp copy of the text to work with
- $text = $this->getText();
- //remove everything in sections that we dont want
- preg_match_all('/(={2,7})([^=]+)\1/',$text,$sections);
- foreach($sections[0] as $key => $header)
- {
- //if we match a section we dont want then remove it
- if(preg_match('/(External links?|References?|Notes?|See also|Bibliography)/i',$sections[2][$key]))
- {
- if(isset($sections[0][$key+1]))
- {
- $text = preg_replace('/'.preg_quote($header).'.*?'.$sections[0][$key+1].'/is',"",$text);
- }
- else
- {
- $parts = explode($header,$text);
- $text = $parts[0];
- }
- }
-
- }
- //remove templates, cats, interwikis and extlinks and refs
- $text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
- //fill all links in with a single word
- $text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
- $text = trim($text);
- //return
- return str_word_count($text);
- }
-
- //return a restricted estimate of words in an article
- public function wordcountlead()
- {
- //get a temp copy of the text to work with
- $text = $this->getText();
- $split = preg_split('/(={2,7})([^=]+)\1/',$text);
- $text = $split[0];
- //remove templates, cats, interwikis and extlinks and refs
- $text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
- //fill all links in with a single word
- $text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
- $text = trim($text);
- //return
- return str_word_count($text);
- }
-
- //returns if the page is a redirect or not
- public function isRedirect()
- {
- if($this->matches('/(# ?REDIRECT ?\[\[.*?\]\]|\{\{Soft ?(redir(ect)?|link)\|)/i'))
- {
- return true;
- }
- }
-
- // returns false if the largest section size is smaller than 5000 chars (excluding certain sections)
- public function needsSections()
- {
- global $config;
- //init some vars
- $largestsection = 0;
- $sectioncount = 0;
- //find the sections
- preg_match_all('/\n==(=)? ?.* ?===?/i',$text, $sections, PREG_PATTERN_ORDER);
- $split = preg_split('/\n==(=)? ?.* ?===?/i',$text);
-
- //for each section found
- foreach($split as $id => $section){
- //if we are the lead
- if($id == 0){
- $largestsection = strlen($section);
- $sectioncount++;
- }
- //else we must have a name
- else{
- //make sure we ignore the sections below
- if (preg_match('/See ?also|(external( links)?|references|notes|bibliography|further( reading)?)/i',$sections[0][$id-1]) == 0){
- //if the length of this section is longer than our current largest
- if(strlen($section) > $largestsection){
- //then set it
- $largestsection = strlen($section);
- }
- //increment the section count
- $sectioncount++;
- }
- }
- }
- //if the page has 4+ sections and a largest section of 5000- then return false
- if($sectioncount >= $config['Sections']['remsections'] && $largestsection <= $config['Sections']['remlargest']){//was 2750 for AVG
- return false;
- }elseif($sectioncount <= $config['Sections']['addlargest']/*10000*/ && $largestsection >= $config['Sections']['addsections']/*2*/){//was 2750 for AVG
- return true;
- }elseif ($sectioncount = 1 && $this->wordcount() >= $config['Sections']['addleadonly']/*1000*/){
- return true;
- }
- }
-
- //returns the number of references that we have
- public function isReferenced()
- {
- $temp = $this->getText();
- //remove all ref tags in comments
- $temp = preg_replace('/<!--[^(-->)]*?(<\/?ref[^\/]*?>.*?<\/ref>).*?-->/is',"",$temp);
- //if we match a ref tag after the ones in comments have been ignored
- if(preg_match_all('/<\/?ref[^\/]*?>/is',$temp,$matches))
- {
- return count($matches);
- }
- return null;
- }
-
- //checks if a page is in a BLP category
- public function isBLP()
- {
- $cats = $this->wiki->categories($this->getName());
- foreach ($cats as $cat)
- {
- //Regex to match cats that are definatly NOT BLP
- if(preg_match('/^Category:(Dead people$|[0-9]{0,4}(s BC)? deaths$|(place|year|date)of death (missing|unknown))/i',$cat))
- {
- return false;
- }
- //check if we were born over 115 years ago (return false if we are) per [[Wikipedia:Blp#Recently_dead_or_probably_dead]]
- if(preg_match("/Category:([0-9]{0,4}) births/i",$cat,$matches))
- {
- if($matches[1] < date("Y")-$config['BLP']['presumedead'])
- {
- return false;
- }
- }
- }
- foreach ($cats as $cat)
- {
- //If we are still running see if there is a BLP cat
- if (preg_match('/^Category:(Living people$|[0-9]{0,4} births$)/i',$cat))
- {
- return true;
- }
- }
- //Otherwise we cant guess
- return null;
- }
-
- //returns true if page is in a given category
- public function inCategory($category)
- {
- $cats = $this->wiki->categories($this->getName());
- foreach ($cats as $cat)
- {
- if ($cat == $category)
- {
- return true;
- }
- }
- return false;
- }
-
- public function interwikilinks()
- {
- //Make sure wikidta is used
- if(!preg_match('/\{\{noexternallanglinks\}\}/',$this->getText()))
- {
- //get the other links
- $r = $this->wiki->wikidatasitelinks($this->getName());
- $counter = 0;
- $id = "";
-
- //if there is only 1 entity (i.e. the wikidata stuff isnt broken somewhere)
- if(count($r) == 1)
- {
- //foreach entitiy found
- foreach($r as $ent)
- {
- $id = $ent['id'];
- //Check if we have site links
- if(isset($ent['sitelinks']))
- {
- //for each sitelink in the entity
- foreach ($ent['sitelinks'] as $l)
- {
- $lang = str_replace("_","-",str_replace("wiki","",$l['site']));
- //echo $lang.":".$l['title']."\n";
- $link = "\n[[".$lang.":".$l['title']."]]";
- if(preg_match('/'.preg_quote($link,'/').'/',$this->getText()))
- {
- //remove the link
- $this->setText(str_replace($link,"",$this->getText()));
- //incrememnt the counter
- $counter++;
- }
- }
- }
- }
- if($counter > 1)
- {
- $this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
- }
- elseif($counter > 0)
- {
- $this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
- }
-
- //Now we want to log any links left over
- preg_match_all('/\[\[(nostalgia|ten|test|aa|ab|ace|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|ba|bar|bat-smg|bcl|be|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|diq|dsb|dv|dz|ee|el|eml|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|got|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jbo|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lt|ltg|lv|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nap|nds|nds-nl|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sc|scn|sco|sd|se|sg|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|udm|ug|uk|ur|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|za|zea|zh|zh-classical|zh-min-nan|zh-yue|zu):([^\]]+)\]\]/i',$this->getText(),$matches);
- //if there are still links left over
- if(count($matches) > 0)
- {
- if(count($matches) > 4){$needlog = true;}
- $tolog = "";
- $needlog = false;
- if($id == ""){$tolog .= "=== ".$this->getName()." [https://www.wikidata.org/wiki/Special:CreateItem UNSET] ===\n";$needlog = true;}
- else{$tolog .= "=== [[d:$id]] ===\n";}
- $tolog .= "* en is [[".$this->getName()."]]\n";
- foreach($matches[0] as $key => $match)
- {
- $tolog .= "** ".$matches[1][$key]." -> ".$matches[2][$key]." [http://www.wikidata.org/w/index.php?title=Special%3AItemByTitle&site=".urlencode($matches[1][$key])."wiki&page=".urlencode($matches[2][$key])." check]\n";
- }
- //Log
- if($needlog)
- {
- $this->logevent('wikidata',$tolog);
- }
- }
- }
- }
- }
- //TODO ADD THIS TO WIKI CLASS
- //Create log function
- //This can be used to post output to User:Addbot/log/<PARAM>
- //Data will be added to the top of the page in a bulleted list
- private function logevent ($type,$what)
- {
- global $config,$wiki;
- //if we are set to log this type
- if(isset($config['Log'][$type]))
- {
- $text = $wiki->getpage('User:'.$config['user'].'/log/'.$config['Log'][$type],null,true);// get previous page
- if(strlen($text) > 2)
- {
- $text = $text."\n".$what;// add our stuff
- $split = explode("\n",$what);
- if(strlen($text) < $config['Log']['wikidatamax'])
- {
- $wiki->edit('User:'.$config['user'].'/log/'.$config['Log'][$type],$text,"Adding ".$split[0],true,true,null,true,$config['General']['maxlag']);// save the page
- }
- }
- }
- }
-
- // returns true if there are 0 links to the page from the mainspace
- // returns false if there is at least 1 link that fits the criteria
- public function isOrphan()
- {
- global $config;
- //get the links to the page
- $links = $this->wiki->whatlinkshere($this->getName(),"&blnamespace=0");
- //if there are no links (i.e. is orphan)
- if(count($links) == $config['Orphans']['maxlinks']/*0*/) {
- //check the tag is allowed on such a page
- if(preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$this->getName()) == FALSE)
- {
- if (preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->getText()) == FALSE)
- {
- if(!$this->inCategory("Category:All set index articles"))
- {
- return true;
- }
- }
- }
- }
- //if there are links then check them
- foreach($links as $link){
- //regex names of links to ignore
- if(!preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$link))
- {
- //regex of contents of pages to ignore
- if (!preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->wiki->getpage($link)))
- {
- //Make sure the page is not in cat "All set index articles"
- if(!$this->inCategory("Category:All set index articles"))
- {
- //if we got this far it isnt an orphaned page
- return false;
- }
- }
- }
- }
- return null;
- }
-
- // If blue links are found it returns the number of blue links
- // returns true if 0 links are found
- public function isDeadend()
- {
- $count = 0;
- // match links to articles
- preg_match_all('/\[\[([^]]+)\]\]/i',$this->getcheckText(), $links, PREG_PATTERN_ORDER);
- foreach($links[1] as $link){
- //if this link has been renammed i.e. [[User:Addbot|Bot]]
- if(preg_match('/\|/',$link) != 0){
- // get the link rather than text name
- $split = preg_split('/\|/',$link);
- $link = $split[0];
- }
- //if it doesnt linked to another namespace
- if (!preg_match('/((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):/i',$link)){
- //and it does look like something we would expect
- if(preg_match('/(^:|^w:|^[^:]+$)/i',$link))
- {
- //count it
- $count++;
- }
- }
- }
- if($count == 0){return true;}
- else{return $count;}
- }
-
- // returns true is 0 categories are found
- // returns false if more than one is found
- public function isUncat()
- {
- // get cats for this page
- $cats = $this->wiki->categories($this->getName(),false);
-
- if(count($cats) == 0)
- {
- //if not cats at all then TRUE (IS UNCAT)
- return true;
- }
- else
- {
- foreach($cats as $cat)
- {
- if(!preg_match('/^Category:(.*?Proposed (for )?deletion.*?|(|.*? )stubs$)/i',$cat))
- {
- //if it is not a stub cat return FALSE (NOT UNCAT)
- return false;
- }
- }
- //If we haven't hit anything else then we must be uncat
- return true;
- }
- }
-
- //return true if the page is appended by .pdf
- public function isPdf()
- {
- if( preg_match("/\.pdf$/i",$this->getName()))
- {
- return true;
- }
- }
-
- //add the given template from the page if it doesnt already exist
- //passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
- public function addTag($template,$section=null)
- {
- //make sure the tag is not already on the page
- if(preg_match('/'.$template->regexTemplate().'/i',$this->getText()) || preg_match('/'.$template->regexTempIssues().'/i',$this->getText())){ return false; }
- //make sure the template's notif is not on the page
- if($template->regexNotif() != false) {if(preg_match("/".$template->regexNotif()."/i",$this->getText())){return false;}}
- //check if we want to add the tag below a section
- if($section)
- {
- //does the section exist?
- if(preg_match ("/== ?".$section." ?==/i",$this->text))
- {
- //then add the tag
- $matches = preg_match ("/== ?".$section." ?==/i",$this->getText());
- $pieces = preg_split("/== ?".$section." ?==/i",$this->getText());
- $this->text = $pieces[0]."==".$matches[1]."==\n".$template->getPost()." ".$pieces[1];
- }
- else // else we can just make the section
- {
- $this->text = "==".$section."==\n".$template->getPost()."\n" .$this->getText();
- }
- }
- else// else just add it to the top of the page
- {
- $this->text = $template->getPost()."\n" .$this->getText();
- }
- // add to the summary for the edit
- $this->addSummary("Adding {{".$template->getName()."}}",true);
- }
-
- //passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
- public function removeTag($template)
- {
- $this->removeRegex('/'.$template->regexTemplate().'/i',"Removing {{".$template->getName()."}}");
- }
-
- //remove the regex match from the page
- //if summary is set then add to edit summary
- public function removeRegex($regex,$summary = null)
- {
- if(preg_match($regex,$this->getText()))//make sure the regex is actually there
- {//if it is remove and say so
- $this->setText(preg_replace($regex,"",preg_replace($regex,"",$this->getText())));
- if($summary != null)
- {//if summary not null then we can add a summary
- $this->addSummary($summary,true);
- }
- }
- }
-
- //checks if a page is a sandbox
- public function isSandbox()
- {
- global $config;
- //check for each sandbox defined
- foreach($config['sandbox'] as $sandbox)
- {
- //if we hit one of our sandboxes
- if($sandbox['name'] == $this->getName())
- {
- return true;
- }
- }
- }
-
- //restores the header of a sandbox
- public function restoreHeader()
- {
- global $config;
- $sandbox = $config['sandbox'][$this->getName()];
- //get the shouldbe header
- $shouldbe = $this->wiki->getpage($sandbox['name'],$sandbox['id']);
- //If the required header is not at the top of the page
- if(!preg_match('/^'.preg_quote($shouldbe).'/s',$this->getText()))
- {
- //Post it to the top removing any other match of it
- $this->setText($shouldbe."\n".preg_replace('/'.preg_quote($shouldbe).'/is',"",$this->getText()));
- $this->addSummary("Restoring sandbox header",true);
- return true;
- }
- }
-
- //parse MI tag, add tags to MI, remove MI if not needed
- public function multipleIssues()
- {
- global $config;
- $this->runMI = $this->runMI + 1;
- $removed = 0;
- $hat = "";//for storing nay hat notes in
- $mi = "";//this will be used to store what we want to add to the page
- $hadMI = null;//did we have MI tag before this check?
- //parse the page
- $this->parse(); // work with $this->parsed;
- //for each template on the page
- foreach($this->parsed['wikObject_templates'] as $x)
- {
- //make sure the template is not surrounded by comment tags
- if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
- {
-
- //does it match the MI template
- if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)$/i',$x->name))
- {
- //Update hadMI if not already set
- if($this->hadMI === null){$this->hadMI = true;}
-
- //IS the MI tag empty?
- if(preg_match('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',$x->rawCode))
- {
- //remove and stop
- $this->text = preg_replace('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',"",$this->getText());
- if($this->hadMI === true && $this->runMI == 2)
- {
- $this->addSummary("Removing {{Multiple issues}}",true);
- }
- return null;
- }
- else//else we must be a new MI style (or a mixture of both)
- {
- //the parse accordingly
- foreach($x->arguments as $tagarg)
- {
- if(!preg_match('/\{/',$tagarg))//if the arg is old style, add it correctly
- {
- //if the tagarg doesnt have a date i.e. 'expert = canada' we need to get the date in the 'date = <DATE>' parameter
- if(!preg_match('/(.*?) ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])/i',$tagarg))
- {
- //add it with a <DATE> placeholder
- $mi = $mi."{{".trim(preg_replace('/ ?= ?/','|',$tagarg))."|date=<DATE>}}\n";
- }
- else//else it does have a date
- {
- //if it is not just a date
- if(!preg_match('/^date ?= ?/i',$tagarg))
- {
- //add it normally
- $mi = $mi."{{".trim(preg_replace('/ ?= ?/','|date=',$tagarg))."}}\n";
- }
- //else it must be a date
- else
- {
- //dont add the date but replace <DATE> in the string with the date
- $mi = str_replace("<DATE>",trim(preg_replace('/date ?= ?/','',$tagarg)),$mi);
- }
- }
- }
- else
- {
- //just add it
- //After a lot of research and testing it turns out the MI tag is allowed 1 parameter with templates in
- //Although this can be in the same MI tag as non template 'old style' paramemters
- $mi = $mi.$tagarg;
- }
- }
-
- $removed = $removed + $x->attributes['length'];
- $this->text = str_replace($x->rawCode,'',$this->getText());
- }
- $mi = preg_replace("/\n/","",$mi);//get rid of new lines
- }
- //else do we match any hatnotes
- elseif(preg_match('/^(Template:)?(Hatnote|Reflink|Main(( |_)list)?|Details3?|See( |_)also2?|Further2?|About|Other( |_)uses-section|For|((Two|Three) )?Other( |_)uses|Other uses of|Redirect[0-1]?[0-9]|Redirect(-|_| )(synomym|text|distinguish2?)|Consider( |_)disambiguation|Other( |_)(uses|people|places|hurricanes|ships|)[1-5]?|(Redirect-)?Distinguish|Selfref|Category( |_)(see also|explanation|pair)|Cat( |_)main|cat(preceding|succeeding)|contrast|This( |_)user( |_)talk)/i',$x->name))
- {
- //make sure the hat note is not under a section
- if(!preg_match('/\n==.*?{{'.$x->name.'/is',$this->getText()))
- {
- if(strlen($x->rawCode) < 4)
- {
- //skip page
- $this->skip = true;
- }
- //remember our hatnotes
- $hat = $hat.$x->rawCode."\n";
- //remove the hatnote matched (we will re add later)
- $removed = $removed + $x->attributes['length'];
- $this->text = str_replace($x->rawCode,'',$this->getText());
- }
- }
- else// else if we match a tag to go in MI
- {
- //check for all of our defined tags
- foreach($config['mitag'] as $tag)
- {
- //if it is one of our tags
- if(preg_match("/^".$tag->regexName()."$/i",$x->name) == true)
- {
- //if we have a section param ignore the tag
- if(preg_match("/\|(sections|sect?)/i",$x->rawCode) == false)
- {
- //remove the tag from page and add to our output
- $mi = $mi.$x->rawCode;
- $this->text = substr_replace($this->getText(),"",$x->attributes['start']-$removed-1,$x->attributes['length']);
- $removed = $removed + $x->attributes['length'];
- }
- }
- }
- }
- }
- }
-
- //Update hadMI if not already set
- if($this->hadMI === null){$this->hadMI = false;}
-
- //crappy way to make sure we split at every tag
- $mi = preg_replace('/\}\}/',"}}\n",$mi);
- //split into each tag (might be joined if from MI)
- $split = preg_split("/\n/",$mi,0,PREG_SPLIT_NO_EMPTY);
- //If there is at least 2 tags
- if(count($split) > 1)
- {
- //add them to a MI tag
- $mi = "{{Multiple issues|\n";//start mi
- foreach ($split as $tag)
- {
- //Check the exact same thing is not already there
- if(!strstr($mi,$tag))
- {
- //TODO: now we want to make sure the same thing is not there just with a different date (also account for redirects)
- $mi = $mi.$tag."\n";//add each tag
- }
- }
- $mi = $mi."}}";//add the end of the tag
- if($this->hadMI === false && $this->runMI == 2)
- {
- //if we have already had a sig change or we have more than 2 tags
- if($this->sigchange == true || count($split) > 2)
- {
- //we can edit
- $this->addSummary("Adding {{Multiple issues}}",true);
- }
- }
- }
- //if only 1 we dont want to use multiple issues
- elseif(count($split) == 1)
- {
- //just add the single tag
- $mi = $split[0];
- if($this->hadMI === true && $this->runMI == 2)
- {
- $this->addSummary("Removing {{Multiple issues}}",true);
- }
-
- }
- //add to origional text with any hatnotes
- $this->text = $hat.$mi."\n".$this->getText();
- }
-
- //parse MI tag, add tags to MI, remove MI if not needed
- public function multipleIssuesDupes()
- {
- global $config;
- //parse the page
- $mi['name'] = Array();
- $mi['date'] = Array();
- $mi['params'] = Array();
- $this->parse(); // work with $this->parsed;
- //for each template on the page
- foreach($this->parsed['wikObject_templates'] as $x)
- {
- //make sure the template is not surrounded by comment tags
- if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
- {
- //does it match the MI template
- if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)/i',$x->name))
- {
- $c = 0;
- //the parse accordingly
- foreach($x->arguments as $tagarg)
- {
- $each = explode("\n",$tagarg);
- foreach ($each as $tag)
- {
- if(preg_match('/\{\{([^\|]+)(.*?)\|date ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])(.*?)\}\}/i',$tag,$matches))
- {
- //if its not already in the arry or it matches a template to ignore
- if(!in_array($matches[1],$mi['name']) || preg_match('/((cleanup-)?expert(_| |-|)(attention|subject|article|portal|verify|))/i',$matches[1]))
- {
- //add it
- $mi['name'][$c] = $matches[1];
- $mi['date'][$c] = $matches[3];
- $mi['params'][$c] = $matches[2].$matches[5];
- $c++;
- }
- else
- {
- $search = array_search($matches[1],$mi['name']);
- //find which date is oldest and keep it
- if(strtotime($matches[3]) < strtotime($mi['date'][$search]))
- {
- //change the date
- $mi['date'][$search] = $matches[3];
- //and if no parameters were given before
- if($mi['params'][$search] == "")
- {
- //give ours
- $mi['params'][$search] = $matches[2].$matches[5];
- }
- }
- }
- }
- //else it doesnt match a nice date format so just ignore it for now
- else
- {
- //add it
- $mi['name'][$c] = $matches[1];
- $mi['date'][$c] = $matches[3];
- $mi['params'][$c] = $matches[2].$matches[5];
- $c++;
- }
- }
- }
- //find our new MI tag
- $torep = "{{Multiple issues|\n";
- foreach($mi['name'] as $key => $name)
- {
- $torep.= "{{".$mi['name'][$key].$mi['params'][$key]."|date=".$mi['date'][$key]."}}\n";
- }
- $torep .= "}}";
- //replace the old with the new
- if(strlen($x->rawCode)-10 > strlen($torep))
- {
- $this->setText(str_replace($x->rawCode,$torep,$this->getText()));
- $this->addSummary("Removing Duplicate tags",true);
- }
- }
- }
- }
- }
-
- //http://en.wikipedia.org/w/index.php?title=Wikipedia:AutoEd/whitespace.js&action=raw&ctype=text/javascript
- public function fixWhitespace()
- {
- $this->text = preg_replace('/(\n\n)\n+/',"$1", $this->getText() );
- //remove leading white space
- $this->text = preg_replace('/^(\n|\r){0,5}/',"", $this->getText() );
- }
-
- public function fixTemplates()
- {
- $this->text = preg_replace( '/\{\{(?:Template:)?(Dab|Disamb|Disambiguation)\}\}/iS', "{{Disambig}}", $this->text );
- $this->text = preg_replace( '/\{\{(?:Template:)?(Bio-dab|Hndisambig)/iS', "{{Hndis", $this->text );
- $this->text = preg_replace( '/\{\{(?:Template:)?(Prettytable|Prettytable100)\}\}/iS', "{{subst:Prettytable}}", $this->text );
- $this->text = preg_replace( '/\{\{(?:[Tt]emplate:)?((?:BASE)?PAGENAMEE?\}\}|[Ll]ived\||[Bb]io-cats\|)/iS', "{{subst:$1", $this->text );
- $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Dd]o-attempt\s*=\s*)[^{}\|]+\|\s*att\s*=\s*([^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
- $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Cc]opyedit\s*)for\s*=\s*[^{}\|]+\|\s*date(\s*=[^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
- $this->text = preg_replace( '/\{\{[Aa]rticle ?issues(?:\s*\|\s*(?:section|article)\s*=\s*[Yy])?\s*\}\}/iS', "", $this->text );
- $this->text = preg_replace( '/\{\{[Cc]ommons\|\s*[Cc]ategory:\s*([^{}]+?)\s*\}\}/iS', "{{Commons category|$1}}", $this->text );
- $this->text = preg_replace( '/(?!{{[Cc]ite wikisource)(\{\{\s*(?:[Cc]it[ae]|[Aa]rticle ?issues)[^{}]*)\|\s*(\}\}|\|)/iS', "$1$2", $this->text );
- $this->text = preg_replace( '/({{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))/iS', "$1$4$2$5$3$6", $this->text );
- $this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{]+(?:\|[^{}]+?)?)\|\s*\2\s*=\s*(\||\}\})/iS', "$1$2$3$4", $this->text );
- $this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])/iS', "$1$3", $this->text );
- $this->text = preg_replace( '/{{\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited)(?=\s*[\|}])/S', "{{Citation needed", $this->text );
- }
-
- public function fixDateTags()
- {
- global $config;
- //get a copy of the text to change
- $text = $this->getText();
- //get the current month and year
- $date = date("F Y");
- $month = date("F");
-
- //check each tag we have to see if it needs to be dated
- foreach ($config['mitag'] as $tag)
- {
- //if the tag can be found without a date
- if($this->matches('/\{\{(Template:)?'.$tag->regexName().'/i'))
- {
- //date tags with no args at all
- $text = preg_replace('/\{\{(Template:)?'.$tag->regexName().'\}\}/i',"{{".$tag->getName()."|date=$date}}",$text);
- //$this->setText(preg_replace('/\{\{(Template:)?'.$tag->regexName().'([^}]*?)\}\}/is',"{{".$tag->getName()."|date=$date$3}}",$text));
- }
- }
-
- //If a tag has been dated
- if(strlen($text) > strlen($this->getText())+5)
- {
- $this->text = $text;
- echo "+";
- $this->addSummary("Dating Tags",true);
- }
- }
-
- public function fixSectionTags()
- {
- global $config;
- //check each tag we have to see if it needs to be dated
- foreach ($config['mitag'] as $tag)
- {
- //if it matches something that can be under a section and not matter
- if(!preg_match('/(unreferenced|emptysection|refimprove|unsourced|footnotes|uncategorized)/i',$tag->getName()))
- {
- //if the tag can be found below a section
- if($this->matches('/(==.*?)\{\{(Template:)?'.$tag->regexName().'(?!sect(ions?)?)[^}]*?\}\}/is'))//todo
- {
- //add the section parameter to the template if it isnt there already
- $this->setText(preg_replace('/(==.*?)\{\{(Template:)?'.$tag->regexName().'((?!sect(ions?)?)[^}]*?)\}\}/is',"$1{{".$tag->getName()."$4|section}}",$this->getText()));
- }
- }
- }
- }
-
- public function fixGeneral()
- {
- global $config;
- //Fix headers
- //TODO FIX THESE
- //$this->text = preg_replace('/== ?External ?links? ?==/i', "==External links==", $this->text );
- //$this->text = preg_replace('/== ?Further ?readings? ?==/i', "==Further reading==", $this->text );
- //Templates
- $this->text = preg_replace('/\{\{'.$config['mitag']['unreferenced']->regexName().'\|section\}\}/i', "{{Unreferenced section}}", $this->text );
-
- if(!$this->matches('/\{\{reflist/i'))
- {$this->text = preg_replace('/<references ?\/>/i',"{{reflist}}", $this->text );}
-
- }
-
- public function preChecks()
- {
- $this->text = str_ireplace("<!-- Automatically added by User:SoxBot. If this is an error, please contact User:Soxred93 -->","",$this->text);
- $this->text = preg_replace("/<!-- ?interwikis?( links?)? ?-->/i","",$this->text);
- }
- }
-
- ?>