PageRenderTime 1563ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/webapp/plugins/twitterrealtime/model/class.StreamMasterCollect.php

https://github.com/devsatish/ThinkUp
PHP | 315 lines | 193 code | 20 blank | 102 comment | 40 complexity | 02bcc11395ade01478810339db7cbe6a MD5 | raw file
  1. <?php
  2. /**
  3. *
  4. * ThinkUp/webapp/plugins/twitterrealtime/model/class.StreamMasterCollect.php
  5. *
  6. * Copyright (c) 2011 Amy Unruh
  7. *
  8. * LICENSE:
  9. *
  10. * This file is part of ThinkUp (http://thinkupapp.com).
  11. *
  12. * ThinkUp is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
  13. * License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any
  14. * later version.
  15. *
  16. * ThinkUp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
  17. * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  18. * details.
  19. *
  20. * You should have received a copy of the GNU General Public License along with ThinkUp. If not, see
  21. * <http://www.gnu.org/licenses/>.
  22. *
  23. *
  24. * Stream Master Collect
  25. * Initiates pulling in Twitter UserStream data from the command line, for asynchronous processing,
  26. * given valid command line credentials.
  27. *
  28. * @license http://www.gnu.org/licenses/gpl.html
  29. * @copyright Amy Unruh
  30. * @author Amy Unruh
  31. */
  32. class StreamMasterCollect {
  33. /**
  34. * @var string
  35. */
  36. protected $streaming_dir;
  37. /**
  38. * @var string
  39. */
  40. protected $log_dir;
  41. /**
  42. * @var string
  43. */
  44. protected $php_path;
  45. /**
  46. * @var StreamProcDao
  47. */
  48. protected $stream_proc_dao;
  49. /**
  50. * @var InstanceDAO
  51. */
  52. protected $instance_dao;
  53. /**
  54. * @var OwnerDAO
  55. */
  56. protected $owner_dao;
  57. /**
  58. * @const int
  59. */
  60. const MAX_INSTANCES = 5; // max # of instances for which we will try to open twitter streams
  61. /**
  62. * @const int
  63. */
  64. const GAP_TIME = 600; // elapsed time in seconds since 'last report' before we conclude that a process is dead.
  65. /**
  66. * Constructor
  67. * @return StreamMasterCollect
  68. */
  69. public function __construct() {
  70. $config = Config::getInstance();
  71. $this->streaming_dir = $config->getValue('source_root_path') . 'webapp/plugins/twitterrealtime/streaming';
  72. // @TODO -- get this from plugin information now
  73. $this->log_dir = $config->getValue('source_root_path') . 'logs';
  74. $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
  75. $options = $plugin_option_dao->getOptionsHash('twitterrealtime', true);
  76. if (isset($options['php_path'])) {
  77. $this->php_path = $options['php_path']->option_value;
  78. } else {
  79. $this->php_path = null; // path to the php interp on the user's system
  80. }
  81. $this->stream_proc_dao = DAOFactory::getDAO('StreamProcDAO');
  82. $this->instance_dao = DAOFactory::getDAO('InstanceDAO');
  83. $this->owner_dao = DAOFactory::getDAO('OwnerDAO');
  84. }
  85. /**
  86. * @return void
  87. */
  88. public function shutdownStreams() {
  89. $logger = Logger::getInstance('stream_log_location');
  90. $logger->logInfo("killing all running streaming processes", __METHOD__.','.__LINE__);
  91. $this->killAllStreamingPIDs();
  92. }
  93. /**
  94. * @return
  95. */
  96. public function launchStreams() {
  97. $logger = Logger::getInstance('stream_log_location');
  98. if (!$this->php_path) {
  99. $logger->logError("php path is not set: check Twitter Realtime plugin configuration",
  100. __METHOD__.','.__LINE__);
  101. return;
  102. }
  103. // get information from database about all streams. This data is indexed by email + instance id.
  104. $stream_hash = $this->stream_proc_dao->getAllStreamProcesses();
  105. // get all owners
  106. $owners = $this->owner_dao->getAllOwners();
  107. $count = 0;
  108. // exec the stream processing script for each owner. This will fire up the
  109. // stream consumption if the owner has a twitter instance.
  110. foreach ($owners as $owner) {
  111. if ($count == self::MAX_INSTANCES) {
  112. break; // only open user stream process for up to MAX_INSTANCES instances
  113. }
  114. // the last argument in the following causes only active instances to be retrieved.
  115. $instances = $this->instance_dao->getByOwnerAndNetwork($owner, 'twitter', true, true);
  116. foreach ($instances as $instance) {
  117. $owner_email = $owner->email;
  118. if (isset($owner_email)) {
  119. $idx = $owner_email . "_" . $instance->id;
  120. $start_new_proc = false;
  121. // if a 'live' process for that user is already running, take no action
  122. if (isset($stream_hash[$idx]) && ($stream_hash[$idx]['email'] == $owner_email) &&
  123. $stream_hash[$idx]['instance_id'] == $instance->id) {
  124. if (strtotime($stream_hash[$idx]['last_report']) < (time() - self::GAP_TIME)) {
  125. $logger->logInfo("killing process " . $stream_hash[$idx]['process_id'] .
  126. " -- it has not updated recently", __METHOD__.','.__LINE__);
  127. $this->psKill($stream_hash[$idx]['process_id']);
  128. $this->stream_proc_dao->deleteProcess($stream_hash[$idx]['process_id']);
  129. $start_new_proc = true;
  130. } else {
  131. $logger->logInfo("process " . $stream_hash[$idx]['process_id'] .
  132. " listed with recent update time for instance with $owner_email and " .
  133. $stream_hash[$idx]['instance_id'] . "-- not starting another one",
  134. __METHOD__.','.__LINE__);
  135. $count++; // include this proc in the count of running processes
  136. }
  137. } else { // start up a process for that instance
  138. $start_new_proc = true;
  139. }
  140. if ($start_new_proc) {
  141. $logger->logInfo("starting new process for " . "$owner_email and " .
  142. $instance->id, __METHOD__.','.__LINE__);
  143. $pass = $this->owner_dao->getPass($owner_email);
  144. if ($pass && isset($this->php_path)) {
  145. // @TODO - check that the dir paths are set properly
  146. // then exec using that owner email and the encrypted pwd as args
  147. $logfile = $this->log_dir . '/' . $owner_email . '_' . $instance->id . '.log';
  148. $pid = shell_exec('cd ' . $this->streaming_dir . '; ' . $this->php_path .
  149. ' stream2.php ' . ' ' . $instance->id . ' ' .
  150. $owner_email . ' ' . $pass . ' > ' . $logfile . ' 2>&1 & echo $!');
  151. if (!isset($pid)) {
  152. throw new StreamingException(
  153. "error: could not obtain PID when starting stream2 process.");
  154. }
  155. // insert PID and email/instance id information into the database.
  156. $res = $this->stream_proc_dao->insertProcessInfo(trim($pid), $owner_email, $instance->id);
  157. if (!$res) {
  158. throw new StreamingException(
  159. "error: issue inserting process information into database.");
  160. }
  161. $logger->logInfo("started pid " . trim($pid) . " for $owner_email and instance id " .
  162. $instance->id, __METHOD__.','.__LINE__);
  163. $count++;
  164. } else {
  165. $logger->logError("error: not launching stream for $owner_email-- error " .
  166. "with specified password or php path", __METHOD__.','.__LINE__);
  167. }
  168. }
  169. if ($count == self::MAX_INSTANCES) {
  170. break; // only open user stream process for up to MAX_OWNERS instances
  171. }
  172. } else {
  173. $logger->logError("error: email info not available. not launching stream for instance "
  174. . $instance->id, __METHOD__.','.__LINE__);
  175. }
  176. } // end foreach instance
  177. } // end foreach owner
  178. } // end launch_streams
  179. /**
  180. * @param $pid
  181. * @return bool
  182. * currently unused
  183. */
  184. private function psExists($pid) {
  185. exec("ps ax | grep $pid 2>&1", $output);
  186. while ( list(,$row) = each($output) ) {
  187. $row_array = explode(" ", $row);
  188. $check_pid = $row_array[0];
  189. if ($pid == $check_pid) {
  190. return true;
  191. }
  192. }
  193. return false;
  194. }
  195. /**
  196. * @throws Exception
  197. * @return array
  198. * currently unused (information stored in database instead)
  199. */
  200. private function getExistingPIDs() {
  201. $logger = Logger::getInstance('stream_log_location');
  202. $dh = @opendir($this->streaming_dir);
  203. $pids = array();
  204. if (!$dh) {
  205. throw new Exception("Cannot open directory " . $this->streaming_dir);
  206. } else {
  207. while (($file = readdir($dh)) !== false) {
  208. if ($file != '.' && $file != '..') {
  209. $pos = strpos($file, '.pid');
  210. if ($pos > 0) {
  211. // extract pid
  212. $pid = substr($file, 0, $pos);
  213. $logger->logInfo("found pid $pid", __METHOD__.','.__LINE__);
  214. $pids[]= $pid;
  215. }
  216. }
  217. }
  218. closedir($dh);
  219. return $pids;
  220. }
  221. unset($dh, $dir, $file, $requiredFile);
  222. return $plugins;
  223. }
  224. /**
  225. * hah. this method courtesy of txt2re.com, a genius service.
  226. * (sadly,) currently not used.
  227. */
  228. private function isSoCool($psline) {
  229. $logger = Logger::getInstance('stream_log_location');
  230. $res = array();
  231. $re1='((?:[a-z][a-z]+))';
  232. $re2='.*?';
  233. $re3='(\\d+)';
  234. $re4='.*?';
  235. $re5='(stream2\\.php)';
  236. $re6='(\\s+)';
  237. $re7='(\\d+)';
  238. $re8='(\\s+)';
  239. $re9='([\\w-+]+(?:\\.[\\w-+]+)*@(?:[\\w-]+\\.)+[a-zA-Z]{2,7})';
  240. if ($c=preg_match_all ("/".$re1.$re2.$re3.$re4.$re5.$re6.$re7.$re8.$re9."/is", $psline, $matches)) {
  241. $word1=$matches[1][0];
  242. $int1=$matches[2][0];
  243. $file1=$matches[3][0];
  244. $ws1=$matches[4][0];
  245. $int2=$matches[5][0];
  246. $ws2=$matches[6][0];
  247. $email1=$matches[7][0];
  248. $logger->logInfo("($word1) ($int1) ($file1) ($ws1) ($int2) ($ws2) ($email1)", __METHOD__.','.__LINE__);
  249. $res = array('login' => $word1 , 'psid' => $int1, 'id' => $int2, 'email' => $email1);
  250. }
  251. return $res;
  252. }
  253. /**
  254. * currently not used (information stored in the db is used instead)
  255. */
  256. private function findAllRunningStreams() {
  257. $cmd = "ps auxwww | grep stream2.php | grep " . $this->php_path;
  258. exec($cmd, $output, $returnValue);
  259. $found = array();
  260. foreach ($output as $psline) {
  261. $res = $this->isSoCool($psline);
  262. if ($res) {
  263. $found[$res['email'] . "_" . $res['id']]= $res;
  264. }
  265. }
  266. return $found;
  267. }
  268. /**
  269. * @return array
  270. */
  271. private function killAllStreamingPIDs() {
  272. $logger = Logger::getInstance('stream_log_location');
  273. $pid_data = $this->stream_proc_dao->getAllStreamProcessIDs();
  274. foreach ($pid_data as $pid_row) {
  275. // kill the given pid
  276. $pid = $pid_row['process_id'];
  277. $logger->logInfo("killing: $pid", __METHOD__.','.__LINE__);
  278. $this->psKill($pid);
  279. // now delete it from the database
  280. $this->stream_proc_dao->deleteProcess($pid);
  281. }
  282. // return $pids;
  283. }
  284. /**
  285. * @param $pid
  286. * @return void
  287. */
  288. private function psKill($pid) {
  289. $logger = Logger::getInstance('stream_log_location');
  290. $output = null;
  291. $returnValue = -1;
  292. exec("kill -9 $pid", $output, $returnValue);
  293. $logger->logInfo("killed: $pid", __METHOD__.','.__LINE__);
  294. }
  295. }