PageRenderTime 57ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/FetLife.php

https://github.com/meitar/libFetLife
PHP | 1002 lines | 636 code | 90 blank | 276 comment | 44 complexity | a3e624e07f052ef29e34c6d756a95ba2 MD5 | raw file
  1. <?php
  2. /**
  3. * Class to interface with FetLife.
  4. *
  5. * PHP version 5
  6. *
  7. * LICENCE: This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. * @author Meitar Moscovitz <meitar@maymay.net>
  21. * @copyright 2012 Meitar Moscovitz
  22. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  23. * @link http://maymay.net/
  24. */
  25. // Uncomment for minimal debugging.
  26. //ini_set('log_errors', true);
  27. //ini_set('error_log', '/tmp/php_errors.log');
  28. /**
  29. * Base class.
  30. */
  31. class FetLife {
  32. const base_url = 'https://fetlife.com'; // No trailing slash!
  33. }
  34. /**
  35. * Handles network connections, logins, logouts, etc.
  36. */
  37. class FetLifeConnection extends FetLife {
  38. var $usr; // Associated FetLifeUser object.
  39. var $cookiejar; // File path to cookies for this user's connection.
  40. var $csrf_token; // The current CSRF authenticity token to use for doing HTTP POSTs.
  41. var $cur_page; // Source code of the last page retrieved.
  42. var $proxy_url; // The url of the proxy to use.
  43. var $proxy_type; // The type of the proxy to use.
  44. function __construct ($usr) {
  45. $this->usr = $usr;
  46. // Initialize cookiejar (session store), etc.
  47. $dir = dirname(__FILE__) . '/fl_sessions';
  48. if (!file_exists($dir)) {
  49. if (!mkdir($dir, 0700)) {
  50. die("Failed to create FetLife Sessions store directory at $dir");
  51. }
  52. } else {
  53. $this->cookiejar = "$dir/{$this->usr->nickname}";
  54. }
  55. }
  56. private function scrapeProxyURL () {
  57. $ch = curl_init(
  58. 'http://www.xroxy.com/proxylist.php?port=&type=Anonymous&ssl=ssl&country=&latency=&reliability=5000'
  59. );
  60. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  61. $html = curl_exec($ch);
  62. curl_close($ch);
  63. $dom = new DOMDocument();
  64. @$dom->loadHTML($html);
  65. $rows = $dom->getElementsByTagName('tr');
  66. $urls = array();
  67. foreach ($rows as $row) {
  68. if (0 === strpos($row->getAttribute('class'), 'row')) {
  69. $str = $row->getElementsByTagName('a')->item(0)->getAttribute('href');
  70. parse_str($str);
  71. $urls[] = array('host' => $host, 'port' => $port);
  72. }
  73. }
  74. $n = mt_rand(0, count($urls) - 1); // choose a random proxy from the scraped list
  75. $p = parse_url("https://{$urls[$n]['host']}:{$urls[$n]['port']}");
  76. return array(
  77. 'url' => "{$p['host']}:{$p['port']}",
  78. 'type' => ('socks' === $p['scheme']) ? CURLPROXY_SOCKS5 : CURLPROXY_HTTP
  79. );
  80. }
  81. // A flag to pass to curl_setopt()'s proxy settings.
  82. public function setProxy ($url, $type = CURLPROXY_HTTP) {
  83. if ('auto' === $url) {
  84. $p = $this->scrapeProxyURL();
  85. $url = $p['url'];
  86. $type = $p['type'];
  87. }
  88. $this->proxy_url = $url;
  89. $this->proxy_type = $type;
  90. }
  91. /**
  92. * Log in to FetLife.
  93. *
  94. * @param object $usr A FetLifeUser to log in as.
  95. * @return bool True if successful, false otherwise.
  96. */
  97. public function logIn () {
  98. // Grab FetLife login page HTML to get CSRF token.
  99. $ch = curl_init(self::base_url . '/login');
  100. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  101. if ($this->proxy_url) {
  102. curl_setopt($ch, CURLOPT_PROXY, $this->proxy_url);
  103. curl_setopt($ch, CURLOPT_PROXYTYPE, $this->proxy_type);
  104. }
  105. $this->setCsrfToken($this->findCsrfToken(curl_exec($ch)));
  106. curl_close($ch);
  107. // Set up login credentials.
  108. $post_data = http_build_query(array(
  109. 'nickname_or_email' => $this->usr->nickname,
  110. 'password' => $this->usr->password,
  111. 'authenticity_token' => $this->csrf_token,
  112. 'commit' => 'Login+to+FetLife' // Emulate pushing the "Login to FetLife" button.
  113. ));
  114. // Log in to FetLife.
  115. return $this->doHttpPost('/session', $post_data);
  116. }
  117. /**
  118. * Calls doHttpRequest with the POST option set.
  119. */
  120. public function doHttpPost ($url_path, $data = '') {
  121. return $this->doHttpRequest($url_path, $data, 'POST');
  122. }
  123. /**
  124. * Calls doHttpRequest with the GET option set.
  125. */
  126. public function doHttpGet ($url_path, $data = '') {
  127. return $this->doHttpRequest($url_path, $data); // 'GET' is the default.
  128. }
  129. /**
  130. * Generic HTTP request function.
  131. *
  132. * @param string $url_path The request URI to send to FetLife. E.g., "/users/1".
  133. * @param string $data Parameters to send in the HTTP request. Recommended to use http_build_query().
  134. * @param string $method The HTTP method to use, like GET (default), POST, etc.
  135. * @return array $r The result of the HTTP request.
  136. */
  137. private function doHttpRequest ($url_path, $data, $method = 'GET') {
  138. //var_dump($this->csrf_token);
  139. if (!empty($data) && 'GET' === $method) {
  140. $url_path += "?$data";
  141. }
  142. $ch = curl_init(self::base_url . $url_path);
  143. if ('POST' === $method) {
  144. curl_setopt($ch, CURLOPT_POST, true);
  145. curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
  146. }
  147. curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookiejar); // use session cookies
  148. curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookiejar); // save session cookies
  149. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, true);
  150. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
  151. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  152. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  153. curl_setopt($ch, CURLINFO_HEADER_OUT, true);
  154. if ($this->proxy_url) {
  155. curl_setopt($ch, CURLOPT_PROXY, $this->proxy_url);
  156. curl_setopt($ch, CURLOPT_PROXYTYPE, $this->proxy_type);
  157. }
  158. $r = array();
  159. $this->cur_page = $r['body'] = curl_exec($ch); // Grab FetLife response body.
  160. $this->setCsrfToken($this->findCsrfToken($r['body'])); // Update on each request.
  161. $r['curl_info'] = curl_getinfo($ch);
  162. curl_close($ch);
  163. return $r;
  164. }
  165. /**
  166. * Given some HTML from FetLife, this finds the current user ID.
  167. *
  168. * @param string $str Some raw HTML expected to be from FetLife.com.
  169. * @return mixed User ID on success. False on failure.
  170. */
  171. public function findUserId ($str) {
  172. $matches = array();
  173. preg_match('/var currentUserId = ([0-9]+);/', $str, $matches);
  174. return $matches[1];
  175. }
  176. /**
  177. * Given some HTML from FetLife, this finds a user's nickname.
  178. *
  179. * @param string $str Some raw HTML expected to be from FetLife.com.
  180. * @return mixed User nickname on Success. False on failure.
  181. */
  182. public function findUserNickname ($str) {
  183. $matches = array();
  184. preg_match('/<title>([-_A-Za-z0-9]+) - Kinksters - FetLife<\/title>/', $str, $matches);
  185. return $matches[1];
  186. }
  187. /**
  188. * Given some HTML from FetLife, this finds the current CSRF Token.
  189. *
  190. * @param string $str Some raw HTML expected to be form FetLife.com.
  191. * @return mixed CSRF Token string on success. False on failure.
  192. */
  193. private function findCsrfToken ($str) {
  194. $matches = array();
  195. preg_match('/<meta name="csrf-token" content="([+a-zA-Z0-9&#;=-]+)"\/>/', $str, $matches);
  196. // Decode numeric HTML entities if there are any. See also:
  197. // http://www.php.net/manual/en/function.html-entity-decode.php#104617
  198. $r = preg_replace_callback(
  199. '/(&#[0-9]+;)/',
  200. create_function(
  201. '$m',
  202. 'return mb_convert_encoding($m[1], \'UTF-8\', \'HTML-ENTITIES\');'
  203. ),
  204. $matches[1]
  205. );
  206. return $r;
  207. }
  208. private function setCsrfToken ($csrf_token) {
  209. $this->csrf_token = $csrf_token;
  210. }
  211. }
  212. /**
  213. * A FetLife User. This class mimics the logged-in user, performing actions, etc.
  214. */
  215. class FetLifeUser extends FetLife {
  216. var $nickname;
  217. var $password;
  218. var $id;
  219. var $email_address;
  220. var $connection; // A FetLifeConnection object to handle network requests.
  221. var $friends; // An array (eventually, of FetLifeProfile objects).
  222. function __construct ($nickname, $password) {
  223. $this->nickname = $nickname;
  224. $this->password = $password;
  225. $this->connection = new FetLifeConnection($this);
  226. }
  227. /**
  228. * Logs in to FetLife as the given user.
  229. *
  230. * @return bool True if login was successful, false otherwise.
  231. */
  232. function logIn () {
  233. $response = $this->connection->logIn();
  234. if ($this->id = $this->connection->findUserId($response['body'])) {
  235. return true;
  236. } else {
  237. return false;
  238. }
  239. }
  240. /**
  241. * Translates a FetLife user's nickname to their numeric ID.
  242. */
  243. function getUserIdByNickname ($nickname = NULL) {
  244. if (!$nickname) {
  245. $nickname = $this->nickname;
  246. }
  247. if ($nickname === $this->nickname && !empty($this->id)) {
  248. return $this->id;
  249. } else {
  250. $result = $this->connection->doHttpGet("/$nickname");
  251. $url_parts = parse_url($result['curl_info']['url']);
  252. return current(array_reverse(explode('/', $url_parts['path'])));
  253. }
  254. }
  255. /**
  256. * Translates a FetLife user's ID to their nickname.
  257. */
  258. function getUserNicknameById ($id = NULL) {
  259. if (isset($this->id) && !$id) {
  260. $id = $this->id;
  261. }
  262. $result = $this->connection->doHttpGet("/users/$id");
  263. return $this->connection->findUserNickname($result['body']);
  264. }
  265. function getUserProfile ($who = NULL) {
  266. $id = $this->resolveWho($who);
  267. $profile = new FetLifeProfile(array(
  268. 'usr' => $this,
  269. 'id' => $id
  270. ));
  271. $profile->populate();
  272. return $profile;
  273. }
  274. /**
  275. * Retrieves a user's friend list.
  276. *
  277. * @param mixed $who User whose friends list to search. If a string, treats it as a FetLife nickname and resolves to a numeric ID. If an integer, uses that ID. By default, the logged-in user.
  278. * @param int $pages How many pages to retrieve. By default, retrieves all (0).
  279. * @return array $friends Array of FetLifeProfile objects.
  280. */
  281. function getFriendsOf ($who = NULL, $pages = 0) {
  282. $id = $this->resolveWho($who);
  283. return $this->getUsersInListing("/users/$id/friends", $pages);
  284. }
  285. /**
  286. * Helper function to resolve "$who" we're dealing with.
  287. *
  288. * @param mixed $who The entity to resolve. If a string, assumes a nickname and resolves to an ID. If an integer, uses that.
  289. * @return int The FetLife user's numeric ID.
  290. */
  291. private function resolveWho ($who) {
  292. switch (gettype($who)) {
  293. case 'NULL':
  294. return $this->id;
  295. case 'integer':
  296. return $who;
  297. case 'string':
  298. // Double-check that an integer wasn't passed a string.
  299. if (ctype_digit($who)) {
  300. return (int)$who; // If it was, coerce type appropriately.
  301. } else {
  302. return $this->getUserIdByNickname($who);
  303. }
  304. }
  305. }
  306. /**
  307. * Helper function to determine whether we've been bounced to the "Home" page.
  308. * This might happen if the Profile page we're trying to load doesn't exist.
  309. *
  310. * TODO: Is there a more elegant way for handling this kind of "error"?
  311. */
  312. function isHomePage ($str) {
  313. return (preg_match('/<title>Home - FetLife<\/title>/', $str)) ? true: false;
  314. }
  315. /**
  316. * Helper function to determine whether we've gotten back an HTTP error page.
  317. *
  318. * TODO: Is there a more elegant way for handling this kind of "error"?
  319. */
  320. function isHttp500ErrorPage ($str) {
  321. return (preg_match('/<p class="error_code">500 Internal Server Error<\/p>/', $str)) ? true: false;
  322. }
  323. /**
  324. * Retrieves a user's Writings.
  325. *
  326. * @param mixed $who User whose FetLife Writings to fetch. If a string, treats it as a FetLife nickname and resolves to a numeric ID. If an integer, uses that ID. By default, the logged-in user.
  327. * @param int $pages How many pages to retrieve. By default, retrieves all (0).
  328. * @return array $writings Array of FetLifeWritings objects.
  329. */
  330. function getWritingsOf ($who = NULL, $pages = 0) {
  331. $id = $this->resolveWho($who);
  332. $items = $this->getItemsInListing('//article', "/users/$id/posts", $pages);
  333. $ret = array();
  334. foreach ($items as $v) {
  335. $x = array();
  336. $x['title'] = $v->getElementsByTagName('h2')->item(0)->nodeValue;
  337. $x['category'] = trim($v->getElementsByTagName('strong')->item(0)->nodeValue);
  338. $author_url = $v->getElementsByTagName('a')->item(0)->attributes->getNamedItem('href')->value;
  339. $author_id = (int) current(array_reverse(explode('/', $author_url)));
  340. $author_avatar = $v->getElementsByTagName('img')->item(0)->attributes->getNamedItem('src')->value;
  341. $x['creator'] = new FetLifeProfile(array(
  342. 'id' => $author_id,
  343. 'avatar_url' => $author_avatar
  344. ));
  345. $x['url'] = $v->getElementsByTagName('a')->item(1)->attributes->getNamedItem('href')->value;
  346. $x['id'] = (int) current(array_reverse(explode('/', $x['url'])));
  347. $x['dt_published'] = $v->getElementsByTagName('time')->item(0)->attributes->getNamedItem('datetime')->value;
  348. $x['content'] = $v->getElementsByTagName('div')->item(1); // save the DOMElement object
  349. $x['usr'] = $this;
  350. $ret[] = new FetLifeWriting($x);
  351. }
  352. return $ret;
  353. }
  354. /**
  355. * Retrieves a user's Pictures.
  356. */
  357. function getPicturesOf ($who = NULL, $pages = 0) {
  358. $id = $this->resolveWho($who);
  359. $items = $this->getItemsInListing('//ul[contains(@class, "page")]/li', "/users/$id/pictures", $pages);
  360. $ret = array();
  361. foreach ($items as $v) {
  362. $x = array();
  363. $x['url'] = $v->getElementsByTagName('a')->item(0)->attributes->getNamedItem('href')->value;
  364. $x['id'] = (int) current(array_reverse(explode('/', $x['url'])));
  365. $x['thumb_src'] = $v->getElementsByTagName('img')->item(0)->attributes->getNamedItem('src')->value;
  366. $x['src'] = preg_replace('/_110\.(jpg|jpeg|gif|png)$/', '_720.$1', $x['thumb_src']); // This is a good guess.
  367. $x['content'] = $v->getElementsByTagName('img')->item(0)->attributes->getNamedItem('alt')->value;
  368. $x['creator'] = new FetLifeProfile(array('id' => $id));
  369. $x['usr'] = $this;
  370. $ret[] = new FetLifePicture($x);
  371. }
  372. return $ret;
  373. }
  374. /**
  375. * Retrieves list of group members.
  376. *
  377. * @param int $group_id The ID of the group.
  378. * @param int $pages How many pages to retrieve. By default, retrieve all (0).
  379. * @return array $members Array of DOMElement objects from the group's "user_in_list" elements.
  380. */
  381. function getMembersOfGroup ($group_id, $pages = 0) {
  382. return $this->getUsersInListing("/groups/$group_id/group_memberships", $pages);
  383. }
  384. function getKinkstersWithFetish($fetish_id, $pages = 0) {
  385. return $this->getUsersInListing("/fetishes/$fetish_id/kinksters", $pages);
  386. }
  387. function getKinkstersGoingToEvent($event_id, $pages = 0) {
  388. return $this->getUsersInListing("/events/$event_id/rsvps", $pages);
  389. }
  390. function getKinkstersMaybeGoingToEvent($event_id, $pages = 0) {
  391. return $this->getUsersInListing("/events/$event_id/rsvps/maybe", $pages);
  392. }
  393. /**
  394. * Gets a single event.
  395. *
  396. * @param int $id The event ID to fetch.
  397. * @param mixed $populate True to populate all data, integer to retrieve that number of RSVP pages, false (default) to do nothing.
  398. */
  399. function getEventById ($id, $populate = false) {
  400. $event = new FetLifeEvent(array(
  401. 'usr' => $this,
  402. 'id' => $id,
  403. ));
  404. $event->populate($populate);
  405. return $event;
  406. }
  407. /**
  408. * Retrieves list of events.
  409. *
  410. * TODO: Create an automated way of translating place names to place URL strings.
  411. * @param string $loc_str The "Place" URL part. For instance, "cities/5898" is "Baltimore, Maryland, United States".
  412. * @param int $pages How many pages to retrieve. By default, retrieve all (0).
  413. */
  414. function getUpcomingEventsInLocation ($loc_str, $pages = 0) {
  415. return $this->getEventsInListing("/$loc_str/events", $pages);
  416. }
  417. /**
  418. * Loads a specific page from a paginated list.
  419. *
  420. * @param string $url The URL of the paginated set.
  421. * @param int $page The number of the page in the set.
  422. * @return array The result of the HTTP request.
  423. * @see FetLifeConnection::doHttpRequest
  424. */
  425. private function loadPage ($url, $page = 1) {
  426. if ($page > 1) {
  427. $url .= "?page=$page";
  428. }
  429. return $this->connection->doHttpGet($url);
  430. }
  431. /**
  432. * Counts number of pages in a paginated listing.
  433. *
  434. * @param DOMDocument $doc The page to look for paginated numbering in.
  435. * @return int Number of pages.
  436. */
  437. private function countPaginatedPages ($doc) {
  438. $result = $this->doXPathQuery('//a[@class="next_page"]/../a', $doc); // get all pagination elements
  439. if (0 === $result->length) {
  440. // This is the first (and last) page.
  441. $num_pages = 1;
  442. } else {
  443. $num_pages = (int) $result->item($result->length - 2)->textContent;
  444. }
  445. return $num_pages;
  446. }
  447. // Helper function to return the results of an XPath query.
  448. public function doXPathQuery ($x, $doc) {
  449. $xpath = new DOMXPath($doc);
  450. return $xpath->query($x);
  451. }
  452. /**
  453. * Iterates through a listing of users, such as a friends list or group membership list.
  454. *
  455. * @param string $url_base The base URL for the listing pages.
  456. * @param int $pages The number of pages to iterate through.
  457. * @return array Array of FetLifeProfile objects from the listing's "user_in_list" elements.
  458. */
  459. private function getUsersInListing ($url_base, $pages) {
  460. $items = $this->getItemsInListing('//*[contains(@class, "user_in_list")]', $url_base, $pages);
  461. $ret = array();
  462. foreach ($items as $v) {
  463. $u = array();
  464. $u['nickname'] = $v->getElementsByTagName('img')->item(0)->attributes->getNamedItem('alt')->value;
  465. $u['avatar_url'] = $v->getElementsByTagName('img')->item(0)->attributes->getNamedItem('src')->value;
  466. $u['url'] = $v->getElementsByTagName('a')->item(0)->attributes->getNamedItem('href')->value;
  467. $u['id'] = current(array_reverse(explode('/', $u['url'])));
  468. list(, $u['age'], $u['gender'], $u['role']) = $this->parseAgeGenderRole($v->getElementsByTagName('span')->item(1)->nodeValue);
  469. $u['location'] = $v->getElementsByTagName('em')->item(0)->nodeValue;
  470. $ret[] = new FetLifeProfile($u);
  471. }
  472. return $ret;
  473. }
  474. // TODO: Perhaps these utility functions ought go in their own parser class?
  475. /**
  476. * Helper function to parse some info from a FetLife "profile_header" block.
  477. *
  478. * @param DOMDocument $doc The DOMDocument representing the page we're parsing.
  479. * @return FetLifeProfile A FetLifeProfile object.
  480. */
  481. function parseProfileHeader ($doc) {
  482. $hdr = $doc->getElementById('profile_header');
  483. $el = $hdr->getElementsByTagName('img')->item(0);
  484. $author_name = $el->attributes->getNamedItem('alt')->value;
  485. $author_avatar = $el->attributes->getNamedItem('src')->value;
  486. $author_url = $hdr->getElementsByTagName('a')->item(0)->attributes->getNamedItem('href')->value;
  487. $author_id = (int) current(array_reverse(explode('/', $author_url)));
  488. list(, $author_age,
  489. $author_gender,
  490. $author_role) = $this->parseAgeGenderRole($this->doXPathQuery('//*[@class="age_gender_role"]', $doc)->item(0)->nodeValue);
  491. // substr() is used to remove the parenthesis around the location here.
  492. $author_location = substr($this->doXPathQuery('//*[@class="location"]', $doc)->item(0)->nodeValue, 1, -1);
  493. return new FetLifeProfile(array(
  494. 'nickname' => $author_name,
  495. 'avatar_url' => $author_avatar,
  496. 'id' => $author_id,
  497. 'age' => $author_age,
  498. 'gender' => $author_gender,
  499. 'role' => $author_role,
  500. 'location' => $author_location
  501. ));
  502. }
  503. function parseAgeGenderRole ($str) {
  504. $m = array();
  505. preg_match('/^([0-9]{2})(\S+)? (\S+)?$/', $str, $m);
  506. return $m;
  507. }
  508. /**
  509. * Helper function to parse any comments section on the page.
  510. *
  511. * @param DOMDocument $doc The DOMDocument representing the page we're parsing.
  512. * @return Array An Array of FetLifeComment objects.
  513. */
  514. function parseComments ($doc) {
  515. $ret = array();
  516. $comments = $doc->getElementById('comments')->getElementsByTagName('article');
  517. foreach ($comments as $comment) {
  518. $commenter_el = $comment->getElementsByTagName('a')->item(0);
  519. $commenter_url = $commenter_el->attributes->getNamedItem('href')->value;
  520. $ret[] = new FetLifeComment(array(
  521. 'id' => (int) current(array_reverse(explode('_', $comment->getAttribute('id')))),
  522. 'creator' => new FetLifeProfile(array(
  523. 'url' => $commenter_url,
  524. 'id' => (int) current(array_reverse(explode('/', $commenter_url))),
  525. 'avatar_url' => $commenter_el->getElementsByTagName('img')->item(0)->attributes->getNamedItem('src')->value,
  526. 'nickname' => $commenter_el->getElementsByTagName('img')->item(0)->attributes->getNamedItem('alt')->value
  527. )),
  528. 'dt_published' => $comment->getElementsByTagName('time')->item(0)->attributes->getNamedItem('datetime')->value,
  529. 'content' => $comment->getElementsByTagName('div')->item(0)
  530. ));
  531. }
  532. return $ret;
  533. }
  534. /**
  535. * Iterates through a set of events from a given multi-page listing.
  536. *
  537. * @param string $url_base The base URL for the listing pages.
  538. * @param int $pages The number of pages to iterate through.
  539. * @return array Array of FetLifeEvent objects from the listed set.
  540. */
  541. private function getEventsInListing ($url_base, $pages) {
  542. $items = $this->getItemsInListing('//*[contains(@class, "event_listings")]/li', $url_base, $pages);
  543. $ret = array();
  544. foreach ($items as $v) {
  545. $e = array();
  546. $e['title'] = $v->getElementsByTagName('a')->item(0)->nodeValue;
  547. $e['url'] = $v->getElementsByTagName('a')->item(0)->attributes->getNamedItem('href')->value;
  548. $e['id'] = current(array_reverse(explode('/', $e['url'])));
  549. // Suppress this warning because we're manually appending UTC timezone marker.
  550. $start_timestamp = @strtotime($v->getElementsByTagName('div')->item(1)->nodeValue . ' UTC');
  551. $e['dtstart'] = ($start_timestamp) ?
  552. gmstrftime('%Y-%m-%d %H:%MZ', $start_timestamp) : $v->getElementsByTagName('div')->item(1)->nodeValue;
  553. $e['venue_name'] = $v->getElementsByTagName('div')->item(2)->nodeValue;
  554. $e['usr'] = $this;
  555. $ret[] = new FetLifeEvent($e);
  556. }
  557. return $ret;
  558. }
  559. /**
  560. * Iterates through a multi-page listing of items that match an XPath query.
  561. */
  562. private function getItemsInListing ($xpath, $url_base, $pages) {
  563. // Retrieve the first page.
  564. $cur_page = 1;
  565. $x = $this->loadPage($url_base, $cur_page);
  566. $doc = new DOMDocument();
  567. @$doc->loadHTML($x['body']);
  568. $num_pages = $this->countPaginatedPages($doc);
  569. // If retrieving all pages, set the page retrieval limit to the last existing page.
  570. if (0 === $pages) {
  571. $pages = $num_pages;
  572. }
  573. // Find and store items on this page.
  574. $items = array();
  575. $entries = $this->doXPathQuery($xpath, $doc);
  576. foreach ($entries as $entry) {
  577. $items[] = $entry;
  578. }
  579. // Find and store items on remainder of pages.
  580. while ( ($cur_page < $num_pages) && ($cur_page < $pages) ) {
  581. $cur_page++; // increment to get to next page
  582. $x = $this->loadPage($url_base, $cur_page);
  583. @$doc->loadHTML($x['body']);
  584. // Find and store friends on this page.
  585. $entries = $this->doXPathQuery($xpath, $doc);
  586. foreach ($entries as $entry) {
  587. $items[] = $entry;
  588. }
  589. }
  590. return $items;
  591. }
  592. }
  593. /**
  594. * Base class for various content items within FetLife.
  595. */
  596. class FetLifeContent extends FetLife {
  597. var $usr; // Associated FetLifeUser object.
  598. var $id;
  599. var $content; // DOMElement object. Use `getContentHtml()` to get as string.
  600. var $dt_published;
  601. var $creator;
  602. // Return the full URL, with fragment identifier.
  603. // Child classes should define their own getUrl() method!
  604. // TODO: Should this become an abstract class to enforce his contract?
  605. // If so, what should be done with the class variables? They'll
  606. // get changed to be class constants, which may not be acceptable.
  607. function getPermalink () {
  608. return self::base_url . $this->getUrl();
  609. }
  610. // Fetches and fills in the remainder of the object's data.
  611. // For this to work, child classes must define their own parseHtml() method.
  612. function populate () {
  613. $resp = $this->usr->connection->doHttpGet($this->getUrl());
  614. $data = $this->parseHtml($resp['body']);
  615. foreach ($data as $k => $v) {
  616. $this->$k = $v;
  617. }
  618. }
  619. function getContentHtml () {
  620. $html = '';
  621. $doc = new DOMDocument();
  622. foreach ($this->content->childNodes as $node) {
  623. $el = $doc->importNode($node, true);
  624. $html .= $doc->saveHTML($el);
  625. }
  626. return $html;
  627. }
  628. }
  629. /**
  630. * A FetLife Writing published by a user.
  631. */
  632. class FetLifeWriting extends FetLifeContent {
  633. var $title;
  634. var $category;
  635. var $privacy;
  636. var $comments; // An array of FetLifeComment objects.
  637. // TODO: Implement "love" fetching?
  638. var $loves;
  639. function FetLifeWriting ($arr_param) {
  640. // TODO: Rewrite this a bit more defensively.
  641. foreach ($arr_param as $k => $v) {
  642. $this->$k = $v;
  643. }
  644. }
  645. // Returns the server-relative URL of the profile.
  646. function getUrl () {
  647. return '/users/' . $this->creator->id . '/posts/' . $this->id;
  648. }
  649. // Given some HTML of a FetLife writing page, returns an array of its data.
  650. function parseHtml ($html) {
  651. $doc = new DOMDocument();
  652. @$doc->loadHTML($html);
  653. $ret = array();
  654. $ret['creator'] = $this->usr->parseProfileHeader($doc);
  655. $ret['title'] = $doc->getElementsByTagName('h2')->item(0)->nodeValue;
  656. $ret['content'] = $this->usr->doXPathQuery('//*[@id="post_content"]//div', $doc)->item(1);
  657. $ret['category'] = trim($this->usr->doXPathQuery('//*[@id="post_content"]//header//strong', $doc)->item(0)->nodeValue);
  658. $ret['dt_published'] = $this->usr->doXPathQuery('//*[@id="post_content"]//time/@datetime', $doc)->item(0)->value;
  659. $ret['privacy'] = $this->usr->doXPathQuery('//*[@id="privacy_section"]//*[@class="display"]', $doc)->item(0)->nodeValue;
  660. $ret['comments'] = $this->usr->parseComments($doc);
  661. return $ret;
  662. }
  663. // Override parent's implementation to strip out final paragraph from
  664. // contents that were scraped from a Writing listing page.
  665. function getContentHtml () {
  666. $html = '';
  667. $doc = new DOMDocument();
  668. foreach ($this->content->childNodes as $node) {
  669. $el = $doc->importNode($node, true);
  670. // Strip out FetLife's own "Read NUMBER comments" paragraph
  671. if ($el->hasAttributes() && (false !== stripos($el->attributes->getNamedItem('class')->value, 'no_underline')) ) {
  672. continue;
  673. }
  674. $html .= $doc->saveHTML($el);
  675. }
  676. return $html;
  677. }
  678. }
  679. /**
  680. * A FetLife Picture page. (Not the <img/> itself.)
  681. */
  682. class FetLifePicture extends FetLifeContent {
  683. var $src; // The fully-qualified URL of the image itself.
  684. var $thumb_src; // The fully-qualified URL of the thumbnail.
  685. var $comments;
  686. function FetLifePicture ($arr_param) {
  687. // TODO: Rewrite this a bit more defensively.
  688. foreach ($arr_param as $k => $v) {
  689. $this->$k = $v;
  690. }
  691. }
  692. function getUrl () {
  693. return "/users/{$this->creator->id}/pictures/{$this->id}";
  694. }
  695. // Parses a FetLife Picture page's HTML.
  696. function parseHtml ($html) {
  697. $doc = new DOMDocument();
  698. @$doc->loadHTML($html);
  699. $ret = array();
  700. $ret['creator'] = $this->usr->parseProfileHeader($doc);
  701. // TODO: I guess I could look at the actual page instea of guessing?
  702. //$ret['src'];
  703. $ret['content'] = $this->usr->doXPathQuery('//span[contains(@class, "caption")]', $doc)->item(0);
  704. $ret['dt_published'] = $doc->getElementById('picture')->getElementsByTagName('time')->item(0)->attributes->getNamedItem('datetime')->value;
  705. $ret['comments'] = $this->usr->parseComments($doc);
  706. return $ret;
  707. }
  708. }
  709. /**
  710. * Generic class for comments on FetLife contents.
  711. */
  712. class FetLifeComment extends FetLifeContent {
  713. var $id;
  714. var $creator;
  715. function FetLifeComment ($arr_param) {
  716. // TODO: Rewrite this a bit more defensively.
  717. foreach ($arr_param as $k => $v) {
  718. $this->$k = $v;
  719. }
  720. }
  721. function getUrl () {
  722. return parent::getUrl() . '#' . $this->getContentType() . "_comment_{$this->id}";
  723. }
  724. // Helper function to reflect on what this comment is attached to.
  725. private function getContentType () {
  726. switch ($x = get_parent_class($this)) {
  727. case 'FetLifeWriting':
  728. return 'post';
  729. case 'FetLifeStatus':
  730. return 'status';
  731. default:
  732. return $x;
  733. }
  734. }
  735. }
  736. /**
  737. * Profile information for a FetLife User.
  738. */
  739. class FetLifeProfile extends FetLifeContent {
  740. var $age;
  741. var $avatar_url;
  742. var $gender;
  743. var $id;
  744. var $location; // TODO: Split this up?
  745. var $nickname;
  746. var $role;
  747. var $paying_account;
  748. var $num_friends; // Number of friends displayed on their profile.
  749. // TODO: etc...
  750. function FetLifeProfile ($arr_param) {
  751. unset($this->creator); // Profile can't have a creator; it IS a creator.
  752. // TODO: Rewrite this a bit more defensively.
  753. foreach ($arr_param as $k => $v) {
  754. $this->$k = $v;
  755. }
  756. }
  757. // Returns the server-relative URL of the profile.
  758. function getUrl () {
  759. return '/users/' . $this->id;
  760. }
  761. /**
  762. * Returns the fully-qualified URL of the profile.
  763. *
  764. * @param bool $named If true, returns the canonical URL by nickname.
  765. */
  766. function getPermalink ($named = false) {
  767. if ($named) {
  768. return self::base_url . "/{$this->nickname}";
  769. } else {
  770. return self::base_url . $this->getUrl();
  771. }
  772. }
  773. // Given some HTML of a FetLife Profile page, returns an array of its data.
  774. function parseHtml ($html) {
  775. // Don't try parsing if we got bounced off the Profile for any reason.
  776. if ($this->usr->isHomePage($html) || $this->usr->isHttp500ErrorPage($html)) {
  777. // TODO: THROW an actual error, please?
  778. return false;
  779. }
  780. $doc = new DOMDocument();
  781. @$doc->loadHTML($html);
  782. $ret = array();
  783. // TODO: Defensively check for HTML elements successfully scraped, this is sloppy.
  784. if ($el = $doc->getElementsByTagName('h2')->item(0)) {
  785. list(, $ret['age'], $ret['gender'], $ret['role']) = $this->usr->parseAgeGenderRole($el->getElementsByTagName('span')->item(0)->nodeValue);
  786. }
  787. if ($el = $this->usr->doXPathQuery('//*[@class="pan"]', $doc)->item(0)) {
  788. $ret['avatar_url'] = $el->attributes->getNamedItem('src')->value;
  789. }
  790. $ret['location'] = $doc->getElementsByTagName('em')->item(0)->nodeValue;
  791. if ($el = $doc->getElementsByTagName('img')->item(0)) {
  792. $ret['nickname'] = $el->attributes->getNamedItem('alt')->value;
  793. }
  794. $ret['paying_account'] = $this->usr->doXPathQuery('//*[contains(@class, "donation_badge")]', $doc)->item(0)->nodeValue;
  795. if ($el = $doc->getElementsByTagName('h4')->item(0)) {
  796. if ($el_x = $el->getElementsByTagName('span')->item(0)) {
  797. $ret['num_friends'] = (int) str_replace(',', '', substr($el_x->nodeValue, 1, -1)); // Strip enclosing parenthesis and commas for results like "(1,057)"
  798. } else {
  799. $ret['num_friends'] = 0;
  800. }
  801. }
  802. return $ret;
  803. }
  804. /**
  805. * Whether or not this user profile has a paid subscription to FetLife.
  806. */
  807. function isPayingAccount () {
  808. return ($this->paying_account) ? true : false;
  809. }
  810. }
  811. /**
  812. * A Status object.
  813. */
  814. class FetLifeStatus extends FetLifeContent {
  815. const MAX_STATUS_LENGTH = 200; // Character count.
  816. var $text;
  817. var $url;
  818. function __construct ($str) {
  819. $this->text = $str;
  820. }
  821. }
  822. /**
  823. * An Event object.
  824. */
  825. class FetLifeEvent extends FetLifeContent {
  826. // See event creation form at https://fetlife.com/events/new
  827. var $usr; // Associated FetLifeUser object.
  828. var $id;
  829. var $title;
  830. var $tagline;
  831. var $dtstart;
  832. var $dtend;
  833. var $venue_name; // Text of the venue name, if provided.
  834. var $venue_address; // Text of the venue address, if provided.
  835. var $adr = array(); // Array of elements matching adr microformat.
  836. var $cost;
  837. var $dress_code;
  838. var $description;
  839. var $created_by; // A FetLifeProfile who created the event.
  840. var $going; // An array of FetLifeProfile objects who are RSVP'ed "Yes."
  841. var $maybegoing; // An array of FetLifeProfile objects who are RSVP'ed "Maybe."
  842. /**
  843. * Creates a new FetLifeEvent object.
  844. *
  845. * @param array $arr_param Associative array of member => value pairs.
  846. */
  847. function FetLifeEvent ($arr_param) {
  848. // TODO: Rewrite this a bit more defensively.
  849. foreach ($arr_param as $k => $v) {
  850. $this->$k = $v;
  851. }
  852. }
  853. // Returns the server-relative URL of the event.
  854. function getUrl () {
  855. return '/events/' . $this->id;
  856. }
  857. // Returns the fully-qualified URL of the event.
  858. function getPermalink () {
  859. return self::base_url . $this->getUrl();
  860. }
  861. /**
  862. * Fetches and fills the remainder of the Event's data.
  863. *
  864. * This is public because it'll take a long time and so it is recommended to
  865. * do so only when you need specific data.
  866. *
  867. * @param mixed $rsvp_pages Number of RSVP pages to get, if any. Default is false, which means attendee lists won't be fetched. Passing true means "all".
  868. */
  869. public function populate ($rsvp_pages = false) {
  870. $resp = $this->usr->connection->doHttpGet($this->getUrl());
  871. $data = $this->parseEventHtml($resp['body']);
  872. foreach ($data as $k => $v) {
  873. $this->$k = $v;
  874. }
  875. if ($rsvp_pages) {
  876. $rsvp_pages = (true === $rsvp_pages) ? 0 : $rsvp_pages; // Privately, 0 means "all".
  877. $this->going = $this->usr->getKinkstersGoingToEvent($this->id, $rsvp_pages);
  878. $this->maybegoing = $this->usr->getKinkstersMaybeGoingToEvent($this->id, $rsvp_pages);
  879. }
  880. }
  881. // Given some HTML of a FetLife event page, returns an array of its data.
  882. private function parseEventHtml ($html) {
  883. $doc = new DOMDocument();
  884. @$doc->loadHTML($html);
  885. $ret = array();
  886. $ret['tagline'] = $this->usr->doXPathQuery('//h1[contains(@itemprop, "name")]/following-sibling::p', $doc)->item(0)->nodeValue;
  887. $ret['dtstart'] = $this->usr->doXPathQuery('//*[contains(@itemprop, "startDate")]/@content', $doc)->item(0)->nodeValue;
  888. $ret['dtend'] = $this->usr->doXPathQuery('//*[contains(@itemprop, "endDate")]/@content', $doc)->item(0)->nodeValue;
  889. $ret['venue_address'] = $this->usr->doXPathQuery('//th/*[text()="Location:"]/../../td/*[contains(@class, "s")]/text()[1]', $doc)->item(0)->nodeValue;
  890. if ($location = $this->usr->doXPathQuery('//*[contains(@itemprop, "location")]', $doc)->item(0)) {
  891. $ret['adr']['country-name'] = $location->getElementsByTagName('meta')->item(0)->attributes->getNamedItem('content')->value;
  892. $ret['adr']['region'] = $location->getElementsByTagName('meta')->item(1)->attributes->getNamedItem('content')->value;
  893. if ($locality = $location->getElementsByTagName('meta')->item(2)) {
  894. $ret['adr']['locality'] = $locality->attributes->getNamedItem('content')->value;
  895. }
  896. }
  897. $ret['cost'] = $this->usr->doXPathQuery('//th[text()="Cost:"]/../td', $doc)->item(0)->nodeValue;
  898. $ret['dress_code'] = $this->usr->doXPathQuery('//th[text()="Dress code:"]/../td', $doc)->item(0)->textContent;
  899. // TODO: Save an HTML representation of the description, then make a getter that returns a text-only version.
  900. // See also http://www.php.net/manual/en/class.domelement.php#101243
  901. $ret['description'] = $this->usr->doXPathQuery('//*[contains(@class, "description")]', $doc)->item(0)->nodeValue;
  902. if ($creator_link = $this->usr->doXPathQuery('//h3[text()="Created by"]/following-sibling::ul//a', $doc)->item(0)) {
  903. $ret['created_by'] = new FetLifeProfile(array(
  904. 'url' => $creator_link->attributes->getNamedItem('href')->value,
  905. 'id' => current(array_reverse(explode('/', $creator_link->attributes->getNamedItem('href')->value))),
  906. 'avatar_url' => $creator_link->getElementsByTagName('img')->item(0)->attributes->getNamedItem('src')->value,
  907. 'nickname' => $creator_link->getElementsByTagName('img')->item(0)->attributes->getNamedItem('alt')->value
  908. ));
  909. }
  910. return $ret;
  911. }
  912. }