addbot /classes/page.php

Language PHP Lines 882
MD5 Hash c65eab7c9d3396ae75775a070cc6b1b9
Repository https://github.com/Dispositif/addbot.git View Raw File
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
<?
require 'parser.php';

class Page {

	// construct the page (you probably want to call load after this)
	public function __construct($page,$wiki) {
		$this->page = preg_replace("/_/"," ",$page);
		$this->wiki = $wiki;
		$this->parseNamespace();
		$this->loadText();//load the wikitext from page
		$this->hadMI; //set default until checked
		$this->runMI = 0; //the number of time MI has been run
		$this->skip = false;
	}	
	
	// variables
	private $page;// page name (e.g. "User:Addshore")
	private $text;// page text
	private $checktext; //this is a temporary copy of the text that can be checked against with comments and nowiki content removed.
	private $namespace;// page namespace (No colon)
	private $wiki;// instance of wiki we are using
	private $parser;// instance of the parser.php class
	private $parsed;
	private $sigchange;//has a significant change happened to the page (enough to edit)?
	private $summary;//summary if edited
	private $hadMI; //did the page have an MI tag when we loaded it
	private $runMI; //the number of times MI has been run
	public $skip; //have we found something that means we should skip?
	
	// getters and setters
	public function getName() { return $this->page; }
	public function getText() { return $this->text;}
	public function getcheckText() { return $this->checktext;}
	public function setText($text) { $this->text = $text;}
	public function getNamespace() { if(!isset($this->namespace)){$this->parseNamespace();} return $this->namespace;}
	public function hasSigchange() { return $this->sigchange; }
	
	// public functions
	// create instance of parser class and parse
	public function parse() { $this->parser = new parser($this->getName(),$this->getText()); $this->parsed = $this->parser->parse(); return $this->parsed;} 
	
	// private functions
	private function loadText() { 
		$text = $this->wiki->getpage($this->getName());
		$this->text = $text;//our actual text
		$this->checktext = preg_replace("/(<nowiki>.*?<\/nowiki>|<!--.*?-->)/is","",$text); //text with nonwiki but wiki elements removed
	} // load the text from the wiki
	private function postPage() { $this->wiki->edit($this->getName(),$this->getText(),$this->getSummary(),true);} 
	private function parseNamespace()
	{
		$result = preg_match("/^((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):?/i",$this->page,$matches);
		if($result == 0){ $this->namespace = "";}// default to article namespace
		else{$this->namespace = $matches[1];}
		if($this->namespace == "Image"){ $this->namespace = "File";}// default Image namespace to file
	}
	
	public function addSummary($sum,$sig)
	{
		//split the summary
		$split = explode(" ",$sum,2);
		//if we CANNOT find the first bit (Adding, Removing, Dating) already in the summary
		if(!preg_match('/'.$split[0].'/i',$this->summary))
		{
			//just add the summary
			$this->summary = $this->summary.$sum." ";
		}
		//else we first bit is already there so we want to insert our second bit
		else
		{
			//replace the first bit in the summary with the first bit and the second bit (a sort of insert)
			$this->summary = preg_replace('/'.$split[0].'/i',$sum,$this->summary);
		}
		//if we want this to be a sig change say so
		if($sig)
		{
			$this->sigchange = true;//if we have a summary it muse be a sig change
		}
	}
	
	//returns the edit summary
	public function getSummary(){
	return "[[User:Addbot|Bot:]] ".$this->summary."([[User talk:Addbot|Report Errors]])";
	}
	
//	                  //
// Main bot functions //
//                    //

	//make matching easier
	public function matches($regex){return preg_match($regex,$this->getText());}
	
	//return a restricted estimate of words in an article
	public function wordcount()
	{
		//get a temp copy of the text to work with
		$text = $this->getText();
		//remove everything in sections that we dont want
		preg_match_all('/(={2,7})([^=]+)\1/',$text,$sections);
		foreach($sections[0] as $key => $header)
		{
			//if we match a section we dont want then remove it
			if(preg_match('/(External links?|References?|Notes?|See also|Bibliography)/i',$sections[2][$key]))
			{
				if(isset($sections[0][$key+1]))
				{
					$text = preg_replace('/'.preg_quote($header).'.*?'.$sections[0][$key+1].'/is',"",$text);
				}
				else
				{
					$parts = explode($header,$text);
					$text = $parts[0];
				}
			}
			
		}
		//remove templates, cats, interwikis and extlinks and refs
		$text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
		//fill all links in with a single word
		$text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
		$text = trim($text);
		//return
		return str_word_count($text);
	}
	
		//return a restricted estimate of words in an article
	public function wordcountlead()
	{
		//get a temp copy of the text to work with
		$text = $this->getText();
		$split = preg_split('/(={2,7})([^=]+)\1/',$text);
		$text = $split[0];

		//remove templates, cats, interwikis and extlinks and refs
		$text = preg_replace("/(\{\{[^\}]*?\}\}|={1,6}[^=]*?={1,6}|\n\*{1,2} ?|\[https?[^\]]*?\]|\[\[(Category|Image|File|[a-z]{2,6}):[^\]]*?\]\]|\<references ?\/\>|<ref>.*?<\/ref>|<!--.*?-->|\{\|.*?\|-.*?\|.*?\|})/is","",$text);
		//fill all links in with a single word
		$text = preg_replace("/\[\[([^]:]*)\]\]/","WORD",$text);
		$text = trim($text);
		//return
		return str_word_count($text);
	}
	
	//returns if the page is a redirect or not
	public function isRedirect()
	{
		if($this->matches('/(# ?REDIRECT ?\[\[.*?\]\]|\{\{Soft ?(redir(ect)?|link)\|)/i'))
		{
			return true;
		}
	}
	
	// returns false if the largest section size is smaller than 5000 chars (excluding certain sections)
	public function needsSections()
	{
		global $config;
		//init some vars
		$largestsection = 0;
		$sectioncount = 0;
		//find the sections
		preg_match_all('/\n==(=)? ?.* ?===?/i',$text, $sections, PREG_PATTERN_ORDER);
		$split = preg_split('/\n==(=)? ?.* ?===?/i',$text);
			
		//for each section found
		foreach($split as $id => $section){
			//if we are the lead
			if($id == 0){
				$largestsection = strlen($section);
				$sectioncount++;
			}
			//else we must have a name
			else{
				//make sure we ignore the sections below
				if (preg_match('/See ?also|(external( links)?|references|notes|bibliography|further( reading)?)/i',$sections[0][$id-1]) == 0){
					//if the length of this section is longer than our current largest
					if(strlen($section) > $largestsection){
						//then set it
						$largestsection = strlen($section);
					}
				//increment the section count
				$sectioncount++;
				}
			}
		}
		//if the page has 4+ sections and a largest section of 5000- then return false
		if($sectioncount >= $config['Sections']['remsections'] && $largestsection <= $config['Sections']['remlargest']){//was 2750 for AVG
			return false;
		}elseif($sectioncount <= $config['Sections']['addlargest']/*10000*/ && $largestsection >= $config['Sections']['addsections']/*2*/){//was 2750 for AVG
			return true;
		}elseif ($sectioncount = 1 && $this->wordcount() >= $config['Sections']['addleadonly']/*1000*/){
			return true;
		}
	}
	
	//returns the number of references that we have
	public function isReferenced()
	{
		$temp = $this->getText();
		//remove all ref tags in comments
		$temp = preg_replace('/<!--[^(-->)]*?(<\/?ref[^\/]*?>.*?<\/ref>).*?-->/is',"",$temp);
		//if we match a ref tag after the ones in comments have been ignored
		if(preg_match_all('/<\/?ref[^\/]*?>/is',$temp,$matches))
		{
			return count($matches);
		}
		return null;
	}
	
	//checks if a page is in a BLP category 
	public function isBLP()
	{
		$cats = $this->wiki->categories($this->getName());
		foreach ($cats as $cat)
		{
			//Regex to match cats that are definatly NOT BLP
			if(preg_match('/^Category:(Dead people$|[0-9]{0,4}(s BC)? deaths$|(place|year|date)of death (missing|unknown))/i',$cat))
			{
					return false;
			}
			//check if we were born over 115 years ago (return false if we are) per [[Wikipedia:Blp#Recently_dead_or_probably_dead]]
			if(preg_match("/Category:([0-9]{0,4}) births/i",$cat,$matches))
			{
				if($matches[1] < date("Y")-$config['BLP']['presumedead'])
				{
					return false;
				}
			}
		}
		foreach ($cats as $cat)
		{
			//If we are still running see if there is a BLP cat
			if (preg_match('/^Category:(Living people$|[0-9]{0,4} births$)/i',$cat))
			{
				return true;
			}
		}
		//Otherwise we cant guess
		return null;
	}
	
	//returns true if page is in a given category
	public function inCategory($category)
	{
		$cats = $this->wiki->categories($this->getName());
		foreach ($cats as $cat)
		{
			if ($cat == $category)
			{
				return true;
			}
		}
		return false;
	}
	
	public function interwikilinks()
	{
		//Make sure wikidta is used
		if(!preg_match('/\{\{noexternallanglinks\}\}/',$this->getText()))
		{
			//get the other links
			$r = $this->wiki->wikidatasitelinks($this->getName());
			$counter = 0;
			$id = "";
		
			//if there is only 1 entity (i.e. the wikidata stuff isnt broken somewhere)
			if(count($r) == 1)
			{
				//foreach entitiy found
				foreach($r as $ent)
				{
					$id = $ent['id'];
					//Check if we have site links
					if(isset($ent['sitelinks']))
					{
						//for each sitelink in the entity
						foreach ($ent['sitelinks'] as $l)
						{
							$lang = str_replace("_","-",str_replace("wiki","",$l['site']));
							//echo $lang.":".$l['title']."\n";
							$link = "\n[[".$lang.":".$l['title']."]]";
							if(preg_match('/'.preg_quote($link,'/').'/',$this->getText()))
							{
								//remove the link
								$this->setText(str_replace($link,"",$this->getText()));
								//incrememnt the counter
								$counter++;
							}
						}
					}
				}
				if($counter > 1)
				{
					$this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
				}
				elseif($counter > 0)
				{
					$this->addSummary("Migrating $counter interwiki links, now provided by [[Wikipedia:Wikidata|Wikidata]] on [[d:$id]]",true);
				}
			
				//Now we want to log any links left over
				preg_match_all('/\[\[(nostalgia|ten|test|aa|ab|ace|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|ba|bar|bat-smg|bcl|be|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|diq|dsb|dv|dz|ee|el|eml|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|got|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jbo|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lt|ltg|lv|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nap|nds|nds-nl|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sc|scn|sco|sd|se|sg|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|udm|ug|uk|ur|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|za|zea|zh|zh-classical|zh-min-nan|zh-yue|zu):([^\]]+)\]\]/i',$this->getText(),$matches);
				//if there are still links left over
				if(count($matches) > 0)
				{
					if(count($matches) > 4){$needlog = true;}
					$tolog = "";
					$needlog = false;
					if($id == ""){$tolog .= "=== ".$this->getName()." [https://www.wikidata.org/wiki/Special:CreateItem UNSET] ===\n";$needlog = true;}
					else{$tolog .= "=== [[d:$id]] ===\n";}
					$tolog .= "* en is  [[".$this->getName()."]]\n";
					foreach($matches[0] as $key => $match)
					{
							$tolog .= "** ".$matches[1][$key]." ->  ".$matches[2][$key]." [http://www.wikidata.org/w/index.php?title=Special%3AItemByTitle&site=".urlencode($matches[1][$key])."wiki&page=".urlencode($matches[2][$key])." check]\n";
					}
					//Log
					if($needlog)
					{
						$this->logevent('wikidata',$tolog);
					}
				}
			}
		}
	}

//TODO ADD THIS TO WIKI CLASS	
//Create log function
//This can be used to post output to User:Addbot/log/<PARAM>
//Data will be added to the top of the page in a bulleted list
private function logevent ($type,$what)
{
	global $config,$wiki;
	//if we are set to log this type
	if(isset($config['Log'][$type]))
	{
		$text = $wiki->getpage('User:'.$config['user'].'/log/'.$config['Log'][$type],null,true);// get previous page
		if(strlen($text) > 2)
			{
			$text = $text."\n".$what;// add our stuff
			$split = explode("\n",$what);
			if(strlen($text) < $config['Log']['wikidatamax'])
			{
				$wiki->edit('User:'.$config['user'].'/log/'.$config['Log'][$type],$text,"Adding ".$split[0],true,true,null,true,$config['General']['maxlag']);// save the page
			}
		}
	}
}
	
	// returns true if there are 0 links to the page from the mainspace
	// returns false if there is at least 1 link that fits the criteria
	public function isOrphan()
	{
		global $config;
		//get the links to the page
		$links = $this->wiki->whatlinkshere($this->getName(),"&blnamespace=0");
		//if there are no links (i.e. is orphan)
		if(count($links) == $config['Orphans']['maxlinks']/*0*/) {
			//check the tag is allowed on such a page
			if(preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$this->getName()) == FALSE)
			{
				if (preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->getText()) == FALSE)
				{
					if(!$this->inCategory("Category:All set index articles"))
					{
						return true;
					}
				}
			}
		}
		//if there are links then check them
		foreach($links as $link){
			//regex names of links to ignore
			if(!preg_match("/((List|Index) of|\(disambig(uation)?\))/i",$link))
			{
				//regex of contents of pages to ignore
				if (!preg_match('/(may refer to ?\:|# ?REDIRECT|\{\{Soft ?(Redir(ect)?|link)|\{\{.*((dis(amb?(ig(uation( page)?)?)?)?)(\-cleanup)?|d(big|ab|mbox)|given( |_)name|sia|set index( articles)?)(\|([0-9a-zA-Z _]*?)( ?= ?[0-9a-zA-Z _]*?)){0,6}\}\})/i',$this->wiki->getpage($link)))
				{
					//Make sure the page is not in cat "All set index articles"
					if(!$this->inCategory("Category:All set index articles"))
					{
						//if we got this far it isnt an orphaned page
						return false;
					}
				}
			}
		}
		return null;
	}
	
	// If blue links are found it returns the number of blue links
	// returns true if 0 links are found
	public function isDeadend()
	{
		$count = 0;
		// match links to articles
		preg_match_all('/\[\[([^]]+)\]\]/i',$this->getcheckText(), $links, PREG_PATTERN_ORDER);
		foreach($links[1] as $link){
			//if this link has been renammed i.e. [[User:Addbot|Bot]]
			if(preg_match('/\|/',$link) != 0){
				// get the link rather than text name
				$split = preg_split('/\|/',$link);
				$link = $split[0];
			}
			//if it doesnt linked to another namespace
			if (!preg_match('/((Talk|User|Wikipedia|File|Image|Mediawiki|Template|Help|Category|Portal|Book|Education( |_)program|TimedText)(( |_)talk)?):/i',$link)){
				//and it does look like something we would expect
				if(preg_match('/(^:|^w:|^[^:]+$)/i',$link))
				{
					//count it
					$count++;
				}
			}
		}
		if($count == 0){return true;}
		else{return $count;}
	}
	
	// returns true is 0 categories are found
	// returns false if more than one is found
	public function isUncat()
	{
		// get cats for this page
		$cats = $this->wiki->categories($this->getName(),false);
		
		if(count($cats) == 0)
		{
			//if not cats at all then TRUE (IS UNCAT)
			return true;
		}
		else
		{
			foreach($cats as $cat)
			{
				if(!preg_match('/^Category:(.*?Proposed (for )?deletion.*?|(|.*? )stubs$)/i',$cat))
				{
					//if it is not a stub cat return FALSE (NOT UNCAT)
					return false;
				}
			}
			//If we haven't hit anything else then we must be uncat
			return true;
		}
	}
	
	//return true if the page is appended by .pdf
	public function isPdf()
	{ 
		if( preg_match("/\.pdf$/i",$this->getName()))
		{
			return true; 
		} 
	}
	
	//add the given template from the page if it doesnt already exist
	//passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
	public function addTag($template,$section=null)
	{
		//make sure the tag is not already on the page
		if(preg_match('/'.$template->regexTemplate().'/i',$this->getText()) || preg_match('/'.$template->regexTempIssues().'/i',$this->getText())){ return false; }
		//make sure the template's notif is not on the page
		if($template->regexNotif() != false) {if(preg_match("/".$template->regexNotif()."/i",$this->getText())){return false;}}
		//check if we want to add the tag below a section
		if($section)
		{
			//does the section exist?
			if(preg_match ("/== ?".$section." ?==/i",$this->text))
			{
				//then add the tag
				$matches = preg_match ("/== ?".$section." ?==/i",$this->getText());
				$pieces = preg_split("/== ?".$section." ?==/i",$this->getText());
				$this->text = $pieces[0]."==".$matches[1]."==\n".$template->getPost()." ".$pieces[1];
			}
			else // else we can just make the section
			{
				$this->text = "==".$section."==\n".$template->getPost()."\n" .$this->getText();
			}
		}
		else// else just add it to the top of the page
		{
			$this->text = $template->getPost()."\n" .$this->getText();
		}
		// add to the summary for the edit
		$this->addSummary("Adding {{".$template->getName()."}}",true);
	}
	
	//passed $config['mitag']['TEMPLATECODE'] (i.e. orphan)
	public function removeTag($template)
	{
		$this->removeRegex('/'.$template->regexTemplate().'/i',"Removing {{".$template->getName()."}}");
	}
	
	//remove the regex match from the page
	//if summary is set then add to edit summary
	public function removeRegex($regex,$summary = null)
	{
		if(preg_match($regex,$this->getText()))//make sure the regex is actually there
		{//if it is remove and say so
			$this->setText(preg_replace($regex,"",preg_replace($regex,"",$this->getText())));
			if($summary != null)
			{//if summary not null then we can add a summary
				$this->addSummary($summary,true);
			}
		}
	}
	
	//checks if a page is a sandbox
	public function isSandbox()
	{
		global $config;
		//check for each sandbox defined
		foreach($config['sandbox'] as $sandbox)
		{
			//if we hit one of our sandboxes
			if($sandbox['name'] == $this->getName())
			{
				return true;
			}
		}
	}
	
	//restores the header of a sandbox
	public function restoreHeader()
	{
		global $config;
		$sandbox = $config['sandbox'][$this->getName()];
		//get the shouldbe header
		$shouldbe = $this->wiki->getpage($sandbox['name'],$sandbox['id']);
		//If the required header is not at the top of the page
		if(!preg_match('/^'.preg_quote($shouldbe).'/s',$this->getText()))
		{
			//Post it to the top removing any other match of it
			$this->setText($shouldbe."\n".preg_replace('/'.preg_quote($shouldbe).'/is',"",$this->getText()));
			$this->addSummary("Restoring sandbox header",true);
			return true;
		}
	}
	
	//parse MI tag, add tags to MI, remove MI if not needed
	public function multipleIssues()
	{
		global $config;
		$this->runMI = $this->runMI + 1;
		$removed = 0;
		$hat = "";//for storing nay hat notes in
		$mi = "";//this will be used to store what we want to add to the page
		$hadMI = null;//did we have MI tag before this check?
		//parse the page
		$this->parse(); // work with $this->parsed;
		//for each template on the page
		foreach($this->parsed['wikObject_templates'] as $x)
		{
			//make sure the template is not surrounded by comment tags
			if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
			{
			
				//does it match the MI template
				if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)$/i',$x->name))
				{
					//Update hadMI if not already set
					if($this->hadMI === null){$this->hadMI = true;}
					
					//IS the MI tag empty?
					if(preg_match('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',$x->rawCode))
					{
						//remove and stop
						$this->text = preg_replace('/\{\{(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)\|?\s*?\}\}/is',"",$this->getText());
						if($this->hadMI === true && $this->runMI == 2)
						{
							$this->addSummary("Removing {{Multiple issues}}",true);
						}
						return null;
					}
					else//else we must be a new MI style (or a mixture of both)
					{
						//the parse accordingly
						foreach($x->arguments as $tagarg)
						{
							if(!preg_match('/\{/',$tagarg))//if the arg is old style, add it correctly
							{
								//if the tagarg doesnt have a date i.e. 'expert = canada' we need to get the date in the 'date = <DATE>' parameter
								if(!preg_match('/(.*?) ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])/i',$tagarg))
								{
									//add it with a <DATE> placeholder
									$mi = $mi."{{".trim(preg_replace('/ ?= ?/','|',$tagarg))."|date=<DATE>}}\n";
								}
								else//else it does have a date
								{
									//if it is not just a date
									if(!preg_match('/^date ?= ?/i',$tagarg))
									{
										//add it normally
										$mi = $mi."{{".trim(preg_replace('/ ?= ?/','|date=',$tagarg))."}}\n";
									}
									//else it must be a date
									else
									{
										//dont add the date but replace <DATE> in the string with the date
										$mi = str_replace("<DATE>",trim(preg_replace('/date ?= ?/','',$tagarg)),$mi);
									}
								}
							}
							else
							{
								//just add it 
								//After a lot of research and testing it turns out the MI tag is allowed 1 parameter with templates in
								//Although this can be in the same MI tag as non template 'old style' paramemters
								$mi = $mi.$tagarg;
							}
						}
						
						$removed = $removed + $x->attributes['length'];
						$this->text = str_replace($x->rawCode,'',$this->getText());
					}
					$mi = preg_replace("/\n/","",$mi);//get rid of new lines
				}
				//else do we match any hatnotes
				elseif(preg_match('/^(Template:)?(Hatnote|Reflink|Main(( |_)list)?|Details3?|See( |_)also2?|Further2?|About|Other( |_)uses-section|For|((Two|Three) )?Other( |_)uses|Other uses of|Redirect[0-1]?[0-9]|Redirect(-|_| )(synomym|text|distinguish2?)|Consider( |_)disambiguation|Other( |_)(uses|people|places|hurricanes|ships|)[1-5]?|(Redirect-)?Distinguish|Selfref|Category( |_)(see also|explanation|pair)|Cat( |_)main|cat(preceding|succeeding)|contrast|This( |_)user( |_)talk)/i',$x->name))
				{
					//make sure the hat note is not under a section
					if(!preg_match('/\n==.*?{{'.$x->name.'/is',$this->getText()))
					{
						if(strlen($x->rawCode) < 4)
						{
							//skip page
							$this->skip = true;
						}
						//remember our hatnotes
						$hat = $hat.$x->rawCode."\n";
						//remove the hatnote matched (we will re add later)
						$removed = $removed + $x->attributes['length'];
						$this->text = str_replace($x->rawCode,'',$this->getText());
					}
				}
				else// else if we match a tag to go in MI
				{
					//check for all of our defined tags
					foreach($config['mitag'] as $tag)
					{
						//if it is one of our tags
						if(preg_match("/^".$tag->regexName()."$/i",$x->name) == true)
						{
							//if we have a section param ignore the tag
							if(preg_match("/\|(sections|sect?)/i",$x->rawCode) == false)
							{
								//remove the tag from page and add to our output
								$mi = $mi.$x->rawCode;
								$this->text = substr_replace($this->getText(),"",$x->attributes['start']-$removed-1,$x->attributes['length']);
								$removed = $removed + $x->attributes['length'];
							}
						}
					}
				}
			}
		}
		
		//Update hadMI if not already set
		if($this->hadMI === null){$this->hadMI = false;}
		
		//crappy way to make sure we split at every tag
		$mi = preg_replace('/\}\}/',"}}\n",$mi);
		//split into each tag (might be joined if from MI)
		$split = preg_split("/\n/",$mi,0,PREG_SPLIT_NO_EMPTY);
		//If there is at least 2 tags
		if(count($split) > 1)
		{
			//add them to a MI tag
			$mi = "{{Multiple issues|\n";//start mi
			foreach ($split as $tag)
			{
				//Check the exact same thing is not already there
				if(!strstr($mi,$tag))
				{
					//TODO: now we want to make sure the same thing is not there just with a different date (also account for redirects)
						$mi = $mi.$tag."\n";//add each tag
				}
			}
			$mi = $mi."}}";//add the end of the tag
			if($this->hadMI === false && $this->runMI == 2)
			{
				//if we have already had a sig change or we have more than 2 tags
				if($this->sigchange == true || count($split) > 2)
				{
					//we can edit
					$this->addSummary("Adding {{Multiple issues}}",true);
				}
			}
		}
		//if only 1 we dont want to use multiple issues
		elseif(count($split) == 1)
		{
			//just add the single tag
			$mi = $split[0];
			if($this->hadMI === true && $this->runMI == 2)
			{
				$this->addSummary("Removing {{Multiple issues}}",true);
			}
			
		}

		//add to origional text with any hatnotes
		$this->text = $hat.$mi."\n".$this->getText();

	}
	
	//parse MI tag, add tags to MI, remove MI if not needed
	public function multipleIssuesDupes()
	{
		global $config;
		//parse the page
		$mi['name'] = Array();
		$mi['date'] = Array();
		$mi['params'] = Array();
		$this->parse(); // work with $this->parsed;
		//for each template on the page
		foreach($this->parsed['wikObject_templates'] as $x)
		{
			//make sure the template is not surrounded by comment tags
			if(!preg_match('/<!--.*?'.preg_quote($x->rawCode,'/').'.*?-->/is',$this->getText()))
			{
				//does it match the MI template
				if(preg_match('/^(Multiple issues|Article issues|Issues|MI|Many Issues|Multiple|Multipleissues)/i',$x->name))
				{					
					$c = 0;
					//the parse accordingly
					foreach($x->arguments as $tagarg)
					{
						$each = explode("\n",$tagarg);
						foreach ($each as $tag)
						{
							if(preg_match('/\{\{([^\|]+)(.*?)\|date ?= ?((January|February|March|April|May|June|July|August|September|October|November|December) ?20[0-9][0-9])(.*?)\}\}/i',$tag,$matches))
							{
								//if its not already in the arry or it matches a template to ignore
								if(!in_array($matches[1],$mi['name']) || preg_match('/((cleanup-)?expert(_| |-|)(attention|subject|article|portal|verify|))/i',$matches[1]))
								{
									//add it
									$mi['name'][$c] = $matches[1];
									$mi['date'][$c] = $matches[3];
									$mi['params'][$c] = $matches[2].$matches[5];
									$c++;
								}
								else
								{
									$search = array_search($matches[1],$mi['name']);
									//find which date is oldest and keep it
									if(strtotime($matches[3]) < strtotime($mi['date'][$search]))
									{
										//change the date
										$mi['date'][$search] = $matches[3];
										//and if no parameters were given before
										if($mi['params'][$search] == "")
										{
											//give ours
											$mi['params'][$search] = $matches[2].$matches[5];
										}
									}
								}
							}
							//else it doesnt match a nice date format so just ignore it for now
							else
							{
								//add it
								$mi['name'][$c] = $matches[1];
								$mi['date'][$c] = $matches[3];
								$mi['params'][$c] = $matches[2].$matches[5];
								$c++;
							}
						}
					}
					//find our new MI tag
					$torep = "{{Multiple issues|\n";
					foreach($mi['name'] as $key => $name)
					{
						$torep.= "{{".$mi['name'][$key].$mi['params'][$key]."|date=".$mi['date'][$key]."}}\n";
					}
					$torep .= "}}";
					//replace the old with the new
					if(strlen($x->rawCode)-10 > strlen($torep))
					{
						$this->setText(str_replace($x->rawCode,$torep,$this->getText()));
						$this->addSummary("Removing Duplicate tags",true);
					}
				}
			}
		}

	}
	
	//http://en.wikipedia.org/w/index.php?title=Wikipedia:AutoEd/whitespace.js&action=raw&ctype=text/javascript
	public function fixWhitespace()
	{
		$this->text = preg_replace('/(\n\n)\n+/',"$1", $this->getText() );
		//remove leading white space
		$this->text = preg_replace('/^(\n|\r){0,5}/',"", $this->getText() );
	}
	
	public function fixTemplates()
	{
		$this->text = preg_replace( '/\{\{(?:Template:)?(Dab|Disamb|Disambiguation)\}\}/iS', "{{Disambig}}", $this->text );
		$this->text = preg_replace( '/\{\{(?:Template:)?(Bio-dab|Hndisambig)/iS', "{{Hndis", $this->text );
		$this->text = preg_replace( '/\{\{(?:Template:)?(Prettytable|Prettytable100)\}\}/iS', "{{subst:Prettytable}}", $this->text );
		$this->text = preg_replace( '/\{\{(?:[Tt]emplate:)?((?:BASE)?PAGENAMEE?\}\}|[Ll]ived\||[Bb]io-cats\|)/iS', "{{subst:$1", $this->text );
		$this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Dd]o-attempt\s*=\s*)[^{}\|]+\|\s*att\s*=\s*([^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
		$this->text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Cc]opyedit\s*)for\s*=\s*[^{}\|]+\|\s*date(\s*=[^{}\|]+)(?=\||}})/iS', "$1$2", $this->text );
		$this->text = preg_replace( '/\{\{[Aa]rticle ?issues(?:\s*\|\s*(?:section|article)\s*=\s*[Yy])?\s*\}\}/iS', "", $this->text );
		$this->text = preg_replace( '/\{\{[Cc]ommons\|\s*[Cc]ategory:\s*([^{}]+?)\s*\}\}/iS', "{{Commons category|$1}}", $this->text );
		$this->text = preg_replace( '/(?!{{[Cc]ite wikisource)(\{\{\s*(?:[Cc]it[ae]|[Aa]rticle ?issues)[^{}]*)\|\s*(\}\}|\|)/iS', "$1$2", $this->text );
		$this->text = preg_replace( '/({{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))/iS', "$1$4$2$5$3$6", $this->text );
		$this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{]+(?:\|[^{}]+?)?)\|\s*\2\s*=\s*(\||\}\})/iS', "$1$2$3$4", $this->text );
		$this->text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])/iS', "$1$3", $this->text );
		$this->text = preg_replace( '/{{\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited)(?=\s*[\|}])/S', "{{Citation needed", $this->text );
	}
	
	public function fixDateTags()
	{
		global $config;
		//get a copy of the text to change
		$text = $this->getText();
		//get the current month and year
		$date = date("F Y");
		$month = date("F");
		
		//check each tag we have to see if it needs to be dated
		foreach ($config['mitag'] as $tag)
		{
			//if the tag can be found without a date
			if($this->matches('/\{\{(Template:)?'.$tag->regexName().'/i'))
			{
				//date tags with no args at all
				$text = preg_replace('/\{\{(Template:)?'.$tag->regexName().'\}\}/i',"{{".$tag->getName()."|date=$date}}",$text);
				//$this->setText(preg_replace('/\{\{(Template:)?'.$tag->regexName().'([^}]*?)\}\}/is',"{{".$tag->getName()."|date=$date$3}}",$text));
			}
		}
		
		//If a tag has been dated
		if(strlen($text) > strlen($this->getText())+5)
		{
			$this->text = $text;
			echo "+";
			$this->addSummary("Dating Tags",true);
		}
	}
	
	public function fixSectionTags()
	{
		global $config;
		//check each tag we have to see if it needs to be dated
		foreach ($config['mitag'] as $tag)
		{
			//if it matches something that can be under a section and not matter
			if(!preg_match('/(unreferenced|emptysection|refimprove|unsourced|footnotes|uncategorized)/i',$tag->getName()))
			{
				//if the tag can be found below a section
				if($this->matches('/(==.*?)\{\{(Template:)?'.$tag->regexName().'(?!sect(ions?)?)[^}]*?\}\}/is'))//todo
				{
					//add the section parameter to the template if it isnt there already
					$this->setText(preg_replace('/(==.*?)\{\{(Template:)?'.$tag->regexName().'((?!sect(ions?)?)[^}]*?)\}\}/is',"$1{{".$tag->getName()."$4|section}}",$this->getText()));
				}
			}
		}
	}
	
	public function fixGeneral()
	{
		global $config;
		//Fix headers
		//TODO FIX THESE
		//$this->text = preg_replace('/== ?External ?links? ?==/i', "==External links==", $this->text );
		//$this->text = preg_replace('/== ?Further ?readings? ?==/i', "==Further reading==", $this->text );
		//Templates
		$this->text = preg_replace('/\{\{'.$config['mitag']['unreferenced']->regexName().'\|section\}\}/i', "{{Unreferenced section}}", $this->text );
		
		if(!$this->matches('/\{\{reflist/i'))
		{$this->text = preg_replace('/<references ?\/>/i',"{{reflist}}", $this->text );}
	
	}
	
	public function preChecks()
	{
		$this->text = str_ireplace("<!-- Automatically added by User:SoxBot. If this is an error, please contact User:Soxred93 -->","",$this->text);
		$this->text = preg_replace("/<!-- ?interwikis?( links?)? ?-->/i","",$this->text);
	}
}
	 
?>
Back to Top