PageRenderTime 68ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/enwiki/list.php

https://github.com/Dispositif/addbot
PHP | 180 lines | 125 code | 22 blank | 33 comment | 17 complexity | 22bddd2cf0c6ded3d129c26af5158aad MD5 | raw file
  1. <?
  2. //TODO: Enable to be passed just 1 page name to be added to the list
  3. // options for running the file
  4. $shortopts = "";
  5. $shortopts .= "r::"; // recursive, use if we DONT want recursion when getting cats
  6. $longopts = array(
  7. "trigger::",// trigger if needed for web
  8. "namespace::",// whichnamespace to use
  9. "method:",// method to be used in terms of source
  10. "source:",// source to be used in method
  11. );
  12. // get the options the file was run with
  13. $option = getopt($shortopts, $longopts);
  14. $namespace = -1;
  15. if(!isset($option['source']) || !isset($option['method'])){
  16. echo "Invalid Input";
  17. die();
  18. }
  19. //try and parse the namespace
  20. if(isset($option['namespace']))
  21. {
  22. try {
  23. $namespace = intval($option['namespace']);
  24. } catch (Exception $e) {
  25. echo 'Caught exception in namespace: ', $e->getMessage(), "\n";
  26. }
  27. }
  28. echo "loading...\n";
  29. sleep(1);
  30. // load the classes and stuff
  31. require '/data/project/addbot/classes/botclasses.php';
  32. require '/data/project/addbot/classes/database.php';
  33. require '/data/project/addbot/classes/template.php';
  34. require 'config.php';
  35. // initialise the wiki
  36. $wiki = new wikipedia;
  37. $wiki->url = 'http://'.$config['url'].'/w/api.php';
  38. global $wiki;
  39. // perform the login
  40. $wiki->login($config['user'],$config['password']);
  41. unset($config['password']);
  42. echo "done";
  43. echo "Get articles from ".$option['source']." using ".$option['method']."\n";
  44. sleep(1);
  45. // get via category members
  46. if(preg_match("/^cat(egory(( |_|-)?members)?)?/i",$option['method'])){
  47. if(!isset($option['r'])){$recursive = true;}else{$recursive = false;}; // default recursion to true
  48. $list = $wiki->categorymembers($option['source'],$recursive);
  49. }
  50. // get via a list on a page
  51. elseif(preg_match("/^(page|list)/i",$option['method'])){
  52. $text = $wiki->getpage($option['source']); // get the page content
  53. $text = preg_replace("/(\* ?|\[\[|\]\])/","",$text); // remove all square brackets (wikilinks)
  54. $list = explode("\n",$text); // explode into an array we can use
  55. if($option['source'] == "User:Addbot/check"){
  56. $wiki->edit("User:Addbot/check","","[[User:Addbot|Bot:]] has added the list to the database",true);} // blank the list if it is our check page
  57. }
  58. // get via transclusions of the source
  59. elseif(preg_match("/^(template|trans(clusions?)?)/i",$option['method'])){
  60. $list = $wiki->getTransclusions($option['source'],null); // sleep for 10 between requests
  61. }
  62. // get via a web list
  63. elseif(preg_match("/^(web|html)/i",$option['method'])){
  64. if(isset($option['trigger'])){ echo "Using ".$option['trigger']."\n"; file_get_contents($option['trigger']); } // if set get the trigger file
  65. sleep(30); // sleep for 30 seconds to make sure the page is updated
  66. $text = file_get_contents($option['source']); // get the content url
  67. $text = preg_replace("/(\[\[|\]\])/","",$text); // remove all square brackets (wikilinks)
  68. $list = explode("\n",$text); // explode into an array we can use
  69. }
  70. // add only the one article given
  71. elseif(preg_match("/^(single|only)/i",$option['method'])){
  72. $list = array($option['source']);
  73. }
  74. else{// our regex didnt match a source
  75. echo "No preset source found\n";
  76. }
  77. // check if the list has been generated and we need to process the stuff below
  78. if(isset($list))
  79. {
  80. echo "List has been generated, processing...\n";
  81. sleep(1);
  82. // after the list has been generated
  83. $final = array_unique($list); // make sure all of the elements is unique
  84. /* Used for reference (en.wikipedia)
  85. 0 Main Talk 1
  86. 2 User User talk 3
  87. 4 Wikipedia Wikipedia talk 5
  88. 6 File File talk 7
  89. 8 MediaWiki MediaWiki talk 9
  90. 10 Template Template talk 11
  91. 12 Help Help talk 13
  92. 14 Category Category talk 15
  93. 100 Portal Portal talk 101
  94. 108 Book Book talk 109
  95. 446 Education Program Education Program talk 447
  96. 710 TimedText TimedText talk 711
  97. */
  98. //restrict the namespace depending on $namespace (presume article namespace only if not set)
  99. switch($namespace) {
  100. // case 0 is different (if an article matches case 0 it will not make it to the list
  101. case 0:$namespaceregex = "(User|Wikipedia|File|Image|MediaWiki|Template|Help|Category|Portal|Book|Education( |_)Program|TimedText)(( |_)talk)?";break;
  102. // from here on we are looking to match the articles we want
  103. case 1:$namespaceregex = "Talk";break;
  104. case 2:$namespaceregex = "User";break;
  105. case 3:$namespaceregex = "User( |_)talk";break;
  106. case 4:$namespaceregex = "Wikipedia";break;
  107. case 5:$namespaceregex = "Wikipedia( |_)talk";break;
  108. case 6:$namespaceregex = "(File|Image)";break;
  109. case 7:$namespaceregex = "(File|Image)( |_)talk";break;
  110. case 10:$namespaceregex = "Template";break;
  111. case 11:$namespaceregex = "Template( |_)talk";break;
  112. case 14:$namespaceregex = "Category";break;
  113. case 15:$namespaceregex = "Category( |_)talk";break;
  114. }
  115. $final = array(); //define a blank array for our final list
  116. foreach($list as $item) // for every item we have collected for the list
  117. {
  118. usleep(100);/*00*/
  119. if($namespace != -1) //-1 is we dont care
  120. {
  121. if($namespace != 0) // if it is not specificly the main namespace
  122. {
  123. if(preg_match("/^".$namespaceregex.":/i",$item)) // get those that match the namespace we want
  124. {
  125. array_push($final,$item); // push our article to the final array
  126. }
  127. }
  128. else // we much = 0 (mainspace)
  129. {
  130. if(!preg_match("/^".$namespaceregex.":/i",$item)) // get those that dont match any other namespace
  131. {
  132. array_push($final,$item); // push our article to the final array
  133. }
  134. }
  135. }
  136. else
  137. {
  138. array_push($final,$item);
  139. }
  140. }
  141. echo "Connecting to DB...\n";
  142. // connect to the database
  143. $db = new Database( $config['dbhost'], $config['dbport'], $config['dbuser'], $config['dbpass'], $config['dbname'], false);
  144. foreach($final as $item) // for each item
  145. {
  146. usleep(1000);
  147. $res = $db->insert($config['tblist'],array('article' => $item,) ); // inset to database table
  148. if( !$res ){echo $db->errorStr()."\n";} // if no result then break as we have an error ($db->errorStr())
  149. else{echo "Added ".$item." to database\n";}
  150. }
  151. }
  152. else
  153. {
  154. echo "Getting list failed!\n";
  155. }
  156. echo "Done\n";
  157. // write to a logfile saying what has happend in regards to the list
  158. ?>