PageRenderTime 53ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/Recommender.class.php

https://bitbucket.org/kljungkvist/test-fork
PHP | 985 lines | 492 code | 200 blank | 293 comment | 60 complexity | d61cb305b0fd017966217475bfd194ab MD5 | raw file
  1. <?php
  2. /***********************************************************
  3. * Recommender Class -- Bayes/memcache version
  4. *
  5. * 06/16/2009 -- KEL created Bayes/Memcache fork for 2009 version
  6. * Created 08/07/2007 by Kristian Ljungkvist Based on Recommender test bed work.
  7. *
  8. * Copyright 2007 Science Buddies. All Rights Reserved
  9. ************************************************************/
  10. require_once 'sciencebuddies/Error.class.php';
  11. require_once 'sciencebuddies/Database.class.php';
  12. require_once 'sciencebuddies/Modifier.class.php';
  13. class Recommender
  14. {
  15. var $sk_summary = array();
  16. var $configuration = array();
  17. var $memcache_handle = null;
  18. var $DB_Connection = null;
  19. var $dataset_filename = "";
  20. var $MEMCACHE_SOCKET_FILE = 'unix:///home/science/memcached.sock';
  21. /**
  22. * Constructor
  23. *
  24. *
  25. * @author Kristian Ljungkvist
  26. */
  27. function __construct($db,$originID,$dataset,$memcache) {
  28. // Bayes/memcache version uses dataset as a prefix into memcache for the various datasets.
  29. if(!$db) {
  30. // No database handle is considered a critical error.
  31. trigger_error("No Database handle passed to Recommender constructor",E_USER_ERROR);
  32. exit;
  33. }
  34. if(!$memcache) {
  35. // Here, let's not force the client to pass in a memcache handle. Instead, we'll create one here if it's not passed in.
  36. $memcache = new Memcache;
  37. $memcache->connect($this->MEMCACHE_SOCKET_FILE,0) or trigger_error ("Could not connect to memcache", E_USER_ERROR);
  38. // Still no memcache handle? Now it's considered a critical error.
  39. if(!$memcache) {
  40. trigger_error("No Memcache handle passed to Recommender constructor",E_USER_ERROR);
  41. exit;
  42. }
  43. }
  44. // Now, get configuration for recommender with the current originID -- default: 1.
  45. $this->DB_Connection = $db;
  46. $sql = "SELECT * FROM recommender_config where originID=$originID";
  47. $this->configuration = $this->DB_Connection->query($sql);
  48. $this->memcache_handle = $memcache;
  49. $this->dataset_filename = $dataset;
  50. $this->sk_summary = $memcache->get("$dataset:sk_summary");
  51. }
  52. /**
  53. * individual_ranking
  54. *
  55. * Returns the ranking of SKs for the current dataset and the individual passed in.
  56. * @author Kristian Ljungkvist
  57. */
  58. function individual_ranking($ind_summary) {
  59. $ranking_array = $this->algorithm_bayes($ind_summary,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
  60. // $ranking_array = $this->algorithm070220h($this->sk_summary,$ind_summary,4);
  61. // KEL 10/18/07 -- Also tag the resulting ranking with the ProfileID of the profile that created it.
  62. // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
  63. $ranking_array['_Source'] = 'Individual';
  64. $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
  65. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  66. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  67. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  68. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  69. $ranking_array['_DatasetFilename'] = $this->dataset_filename; // KEL 06/24/09 -- Store the originating dataset filename for future reference
  70. // since we'll have several "neighborhood" datasets.
  71. return $ranking_array;
  72. }
  73. /**
  74. * show_others_like_this
  75. *
  76. * Returns the ranking of SKs against the passed-in SK.
  77. * @author Kristian Ljungkvist
  78. */
  79. function show_others_like_this($solt_sk) {
  80. // get profile for the source project
  81. // Look up title for this SK.
  82. preg_match('/^([^_]+)/', $solt_sk, $match);
  83. $subarea = $match[0];
  84. $source_project_profile = $this->sk_summary[$subarea]['SKs'][$solt_sk];
  85. // Generate the ranking of that project against the others
  86. // NOTE: We need to generate the project profile somehow in the new Bayes context. We'll most likely need to store one record per project in memcache with
  87. // the project profile. This will need to happen in the add_to_memcache utility.
  88. $ranking_array = $this->algorithm_bayes($source_project_profile,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
  89. // KEL 10/18/07 -- Also tag the resulting ranking with the source and the ProfileID of the profile that created it.
  90. // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
  91. $ranking_array['_Source'] = 'Project';
  92. $ranking_array['_SourceProfile'] = $solt_sk; // For show others like this, we'll use the project filename as the source ID.
  93. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  94. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  95. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  96. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  97. return $ranking_array;
  98. }
  99. /**
  100. * static_ranking
  101. *
  102. * Essentially converts the current dataset into a "ranking" of the form the rendering methods expect.
  103. * Used for the dynamic interest area pages.
  104. * @author Kristian Ljungkvist 05/21/08
  105. */
  106. function static_ranking() {
  107. // Convert the sk_summary member to the SK->MSD form. We'll set all MSD's to 1.0 for this "ranking"
  108. foreach($this->sk_summary as $subarea => $subarea_array) {
  109. foreach($subarea_array['SKs'] as $SK => $SK_array) {
  110. $ranking_array[$SK] = 1.0;
  111. }
  112. }
  113. $ranking_array['_Source'] = 'Individual';
  114. $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
  115. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  116. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  117. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  118. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  119. return $ranking_array;
  120. }
  121. function algorithm_bayes($individual_array,$threshold,$mof,$recenter,$gcv, $dataset, $memcache) {
  122. // Get list of prior probabilities
  123. $prior_probabilities_list = $memcache->get("$dataset:prior_prob");
  124. // echo "algorithm_bayes:prior_probabilities_list:<br/>";
  125. // echo "<pre/>".print_r($prior_probabilities_list)."</pre>";
  126. $MATURITY = 3; // offset in to the answer_list for the maturity
  127. $unordered_ranking = array();
  128. $project_prob = 1;
  129. $num_matching_questions = 0; // keeps track of the total number of matching questions between ind and project.
  130. $highest_maturity = 0; // records the maturity of the most mature question
  131. $skipped_questions = 0;
  132. $matching_questions_ref = array();
  133. $highest_maturities_ref = array();
  134. $question_maturity_threshold = 100;
  135. foreach($individual_array['Questions'] as $question => $user_answer) {
  136. // Grab the corresponding project data list from memcached.
  137. $project_list = $memcache->get("$dataset:$question:$user_answer");
  138. $project_maturity_list = $memcache->get("$dataset:$question:3");
  139. foreach($project_list as $project => $answer_prob) {
  140. $proj_ptr = &$unordered_ranking[$project]; // get a pointer to the project in the ranking. Speeds things up quite a bit.
  141. $maturity_ptr = &$project_maturity_list[$project];
  142. if($answer_prob == 0) {continue;}
  143. if($maturity_ptr > $question_maturity_threshold) {
  144. if(!$proj_ptr) {
  145. $proj_ptr = 1.0;
  146. }
  147. $proj_ptr = $proj_ptr * $answer_prob;
  148. $mqr_ptr = &$matching_questions_ref[$project];
  149. $mqr_ptr++;
  150. $project_hmr_ptr = &$highest_maturity_ref[$project];
  151. if($maturity_ptr > $project_hmr_ptr) {
  152. $project_hmr_ptr = $maturity_ptr;
  153. }
  154. }
  155. }
  156. }
  157. // Now need to loop through the projects and do the nth root/ prior probability calculations.
  158. $include_prior_probability = 1;
  159. foreach($matching_questions_ref as $project => $num_matching_questions) {
  160. $unordered_ranking[$project] = $unordered_ranking[$project] * $prior_probabilities_list[$project]; // Multiply by the prior probability.
  161. $unordered_ranking[$project] = pow($unordered_ranking[$project],(1 / ($num_matching_questions +1))); // Take the nth root where n = number of matching questions for this project.
  162. if(($unordered_ranking[$project] < 1) && ($highest_maturity_ref[$project] > $threshold)) {
  163. $filtered_unordered_ranking[$project] = $unordered_ranking[$project];
  164. }
  165. }
  166. // Now sort the unordered ranking
  167. arsort($filtered_unordered_ranking);
  168. $ranking_array = $filtered_unordered_ranking;
  169. foreach($ranking_array as $SK => $prob) {
  170. $return_ranking[$SK] = $prob;
  171. }
  172. return($return_ranking);
  173. }
  174. /**
  175. * function algorithm070220h
  176. *
  177. **/
  178. function algorithm070220h($big_array,$individual_array,$threshold,$mof,$recenter,$gcv) {
  179. // big_array holds an entire subject area. Individual_array is one individual's responses.
  180. $squared_diff = array();
  181. $absolute_diff = array();
  182. $MSD = array();
  183. $MAV = array();
  184. $SAV = array();
  185. // "<hr/>gcv = $gcv, recenter = $recenter<hr/>";
  186. if(!$recenter) {
  187. if($this->configuration['GlobalRecenterValue'] > 0) {
  188. $recenter = true;
  189. $gcv = $this->configuration['GlobalRecenterValue'];
  190. }
  191. }
  192. if($recenter) {
  193. /** If recenter == True, alter the individual_array by recentering the values:
  194. *
  195. * 1) Calculate the overall average of all the questions that the user answered = UserAvg
  196. * 2) Calculate a re-centering value for the user:
  197. * UserOffset = UserAvg ? GlobalCenterValue [from the template]
  198. * 3) Before calculating either the squared or absolute difference, adjust the user values in the $individual_array, for each question:
  199. * NewUserValuei = OldUserValuei ? UserOffset
  200. **/
  201. if(!isset($gcv)) {
  202. $gcv = 1; // Default Global Center Value to 1
  203. }
  204. $usercount = 0;
  205. $usertotal = 0;
  206. foreach($individual_array['Questions'] as $question => $answer) {
  207. if(!preg_match("/^_/", $question)) {
  208. continue;// Skip questions other than "01c, 1e," etc.
  209. }
  210. $usertotal += $answer;
  211. $usercount++;
  212. }
  213. $useravg = $usertotal / $usercount;
  214. //echo "<hr/>useravg: $useravg<hr/>";
  215. $useroffset = $useravg - $gcv;
  216. //echo "<hr/>useroffset: $useroffset<hr/>";
  217. array_walk($individual_array['Questions'],'recenter_callback',$useroffset);
  218. }
  219. // For each SK, for each survey question, calculate the squared difference
  220. // between the score of the user on the question, and the average for all users
  221. // on that question.
  222. foreach($big_array as $subarea => $subarea_array) {
  223. foreach($subarea_array['SKs'] as $SK => $SK_array) {
  224. foreach($SK_array['Questions'] as $question => $answer) {
  225. if(!preg_match("/^_/", $question)) {
  226. //echo "<HR/> skipping question $question because it's not a survey question";
  227. continue;
  228. } // Skip questions other than "01c, 1e," etc.
  229. if(!isset($individual_array['Questions'][$question])) {
  230. //echo "<HR/> skipping question $question because it's not in the individual_array";
  231. continue;
  232. } // Skip questions the user didn't answer
  233. $squared_diff[$SK][$question] = pow(($individual_array['Questions'][$question] - $answer),2);
  234. $absolute_diff[$SK][$question] = abs($individual_array['Questions'][$question] - $answer);
  235. }
  236. }
  237. }
  238. // For each SK, calculate the MSD by summing the squared differences and dividing by the number of questions
  239. foreach($squared_diff as $SD_SK => $SD_array) {
  240. foreach($SD_array as $sd) {
  241. //echo "<HR/>MSD[$SD_SK] += $sd";
  242. $MSD[$SD_SK] += $sd;
  243. }
  244. // echo "<HR/>MSD[$SD_SK] = ".$MSD[$SD_SK]." / ".count($SD_array);
  245. // KEL 02/08/08 -- Added rounding to 5 decimal places to save space in the cache.
  246. $MSD[$SD_SK] = round($MSD[$SD_SK] / count($SD_array),5);
  247. }
  248. asort($MSD); // Sort list by ascending MSD -- Best match first.
  249. // MAV -- Mean Absolute Value of Difference (MAV)
  250. foreach($absolute_diff as $SD_SK => $SD_array) {
  251. foreach($SD_array as $sd) {
  252. $MAV[$SD_SK] += $sd;
  253. }
  254. $MAV[$SD_SK] = $MAV[$SD_SK] / count($SD_array);
  255. }
  256. asort($MAV); // Sort list by ascending MAV -- Best match first.
  257. // SAV -- Mean Absolute Value of Difference (SAV)
  258. foreach($absolute_diff as $SD_SK => $SD_array) {
  259. foreach($SD_array as $sd) {
  260. $SAV[$SD_SK] += $sd;
  261. }
  262. }
  263. asort($SAV); // Sort list by ascending SAV -- Best match first.
  264. switch($mof) {
  265. case 'MSD':
  266. return $MSD;
  267. break;
  268. case 'MAV':
  269. return $MAV;
  270. break;
  271. case 'SAV':
  272. return $SAV;
  273. break;
  274. }
  275. // Default: MSD
  276. return $MSD;
  277. }
  278. /**
  279. * function post_process -- Run through a set of modifiers *once* on creation of a new ranking list.
  280. *
  281. * Primarily, this is used for new project randomization, since we want the newly randomized list to be
  282. * cached just like the original would be.
  283. *
  284. * Kristian Ljungkvist 09/19/07
  285. *
  286. **/
  287. function post_process($ranking_array,$individual_profile,$state_array) {
  288. $modifier_object = ModifierFactory::createModifier('NewProjectRandomizer');
  289. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  290. $modifier_object = ModifierFactory::createModifier('ProjectRandomizer');
  291. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  292. return $ranking_array;
  293. }
  294. /**
  295. * function render_results
  296. *
  297. **/
  298. function render_results($ranking_array,$individual_profile,$state_array,$favorites) {
  299. // Generate a display version of the ranking array.
  300. // get the list of modifiers from the recommender_configuration for this originID.
  301. $modifier_list = $this->configuration['ModifierList'];
  302. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  303. $page = $state_array['p'];
  304. // Cycle through the modifiers as a pipeline.
  305. foreach (split(',',$modifier_list) as $modifier) {
  306. //echo "instantiating $modifier...<br/>";
  307. // instantiate the appropriate modifier
  308. if($modifier != 'Paginator') {
  309. // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
  310. $modifier_object = ModifierFactory::createModifier($modifier);
  311. // Pass it the current ranking list and state
  312. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  313. // take the resulting ranking and pass it through to the next one.
  314. }
  315. }
  316. // Get the total number of pages in the rendered ranking
  317. $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
  318. // Now, go through the paginator:
  319. $modifier_object = ModifierFactory::createModifier('Paginator');
  320. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  321. $display_array = array();
  322. $cur_rank = 1;
  323. foreach($ranking_array as $SK => $MSD) {
  324. // Check for empty SK, which would only happen if there was an empty project in the array.
  325. // KEL 01/30/08
  326. if(!$SK) {
  327. // Notify me by email if we get an empty SK -- KEL 01/30/08
  328. $msg = "cur_rank = $cur_rank\n";
  329. $msg .= "ranking_array:\n";
  330. $msg .= print_r($ranking_array,true);
  331. $msg .= "\nIndividualID={$state_array['rid']}\n";
  332. $msg .= "\npage={$state_array['p']}\n";
  333. $msg .= "\nStateID={$state_array['sid']}\n";
  334. mail("kristian.ljungkvist@gmail.com","Recommender.class.php -- Empty SK",$msg);
  335. continue;
  336. }
  337. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  338. if(strpos($SK, '_') === 0) {
  339. continue;
  340. }
  341. // Look up title for this SK.
  342. preg_match('/^([^_]+)/', $SK, $match);
  343. $subarea = $match[0];
  344. /*
  345. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  346. if(!$cur_title) {
  347. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  348. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  349. }
  350. */
  351. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  352. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  353. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  354. $project_idea_data = $this->DB_Connection->query($sql);
  355. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  356. $display_array[$cur_rank-1]['subarea'] = $subarea;
  357. $display_array[$cur_rank-1]['filename'] = $SK;
  358. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  359. // Store total number of pages in result set in top entry of display_array.
  360. $display_array[$cur_rank-1]['total_pages'] = $total_pages;
  361. // We'll standardize the capitalization on these. -- KEL
  362. $display_array[$cur_rank-1]['Filename'] = $SK;
  363. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  364. $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
  365. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
  366. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
  367. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  368. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  369. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  370. // Add a flag if this project has been added to favorites
  371. if(in_array($SK,$favorites)) {
  372. $display_array[$cur_rank-1]['Favorite'] = true;
  373. }
  374. $cur_rank++;
  375. }
  376. return $display_array;
  377. }
  378. /**
  379. * render_favorites
  380. *
  381. **/
  382. function render_favorites($favorites) {
  383. $display_array = array();
  384. $cur_rank = 1;
  385. foreach($favorites as $SK) {
  386. // Look up title for this SK.
  387. preg_match('/^([^_]+)/', $SK, $match);
  388. $subarea = $match[0];
  389. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  390. if(!$cur_title) {
  391. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  392. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  393. }
  394. /*
  395. // Look up title for this SK.
  396. preg_match('/^([^_]+)/', $SK, $match);
  397. $subarea = $match[0];
  398. //echo "[$subarea],";
  399. //echo "[$subarea][$SK],";
  400. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  401. //echo "[$cur_title]<br/>";
  402. if(!$cur_title) {
  403. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  404. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  405. }
  406. */
  407. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  408. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  409. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  410. $project_idea_data = $this->DB_Connection->query($sql);
  411. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  412. $display_array[$cur_rank-1]['subarea'] = $subarea;
  413. $display_array[$cur_rank-1]['filename'] = $SK;
  414. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  415. // The following are temporarily fetched from the DB. We should probably cache these in the sk_summary data instead. -- KEL
  416. $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
  417. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
  418. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
  419. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  420. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  421. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  422. $display_array[$cur_rank-1]['Favorite'] = true; // It is a favorite, after all...
  423. $cur_rank++;
  424. }
  425. return $display_array;
  426. }
  427. /**
  428. * function render_interest_area
  429. *
  430. **/
  431. function render_interest_area($ranking_array,$individual_profile,$state_array,$favorites) {
  432. // Generate a display version of the ranking array, filtered by interest area.
  433. // get the list of modifiers from the recommender_configuration for this originID.
  434. $modifier_list = $this->configuration['ModifierList'];
  435. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  436. $page = $state_array['p'];
  437. // Add the InterestArea modifer to the pipeline
  438. $modifier_list = 'DifficultyLevel,InterestArea,Deduper'; // Don't do any randomization on interest areas.
  439. // $modifier_list .= ',InterestArea';
  440. //AreaAssignment,DifficultyLevel,TimeRequired,Deduper,FirstPageNotRandom,HoneyPot,Paginator
  441. // Cycle through the modifiers as a pipeline.
  442. foreach (split(',',$modifier_list) as $modifier) {
  443. // instantiate the appropriate modifier
  444. if(($modifier != 'Paginator') && ($modifier != 'AreaAssignment')) {
  445. // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
  446. // Also skip the AreaAssignment modifier, since we want to override that with the InterestArea Modifier.
  447. $modifier_object = ModifierFactory::createModifier($modifier);
  448. // Pass it the current ranking list and state
  449. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  450. // take the resulting ranking and pass it through to the next one.
  451. }
  452. }
  453. // Get the total number of pages in the rendered ranking
  454. $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
  455. // Now, go through the paginator:
  456. $modifier_object = ModifierFactory::createModifier('Paginator');
  457. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  458. $display_array = array();
  459. $cur_rank = 1;
  460. foreach($ranking_array as $SK => $MSD) {
  461. // Check for empty SK, which would only happen if there was an empty project in the array.
  462. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  463. if(strpos($SK, '_') === 0) {
  464. continue;
  465. }
  466. // Look up title for this SK.
  467. preg_match('/^([^_]+)/', $SK, $match);
  468. $subarea = $match[0];
  469. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  470. if(!$cur_title) {
  471. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  472. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  473. }
  474. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  475. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  476. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  477. $project_idea_data = $this->DB_Connection->query($sql);
  478. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  479. $display_array[$cur_rank-1]['subarea'] = $subarea;
  480. $display_array[$cur_rank-1]['filename'] = $SK;
  481. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  482. // Store total number of pages in result set in top entry of display_array.
  483. $display_array[$cur_rank-1]['total_pages'] = $total_pages;
  484. // We'll standardize the capitalization on these. -- KEL
  485. $display_array[$cur_rank-1]['Filename'] = $SK;
  486. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  487. $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
  488. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
  489. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
  490. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  491. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  492. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  493. // Add a flag if this project has been added to favorites
  494. if(in_array($SK,$favorites)) {
  495. $display_array[$cur_rank-1]['Favorite'] = true;
  496. }
  497. $cur_rank++;
  498. }
  499. return $display_array;
  500. }
  501. /**
  502. * function render_interest_area_all_projects
  503. *
  504. **/
  505. function render_interest_area_all_projects($ranking_array,$individual_profile,$state_array,$favorites) {
  506. // Generate a display version of the ranking array, filtered by interest area.
  507. // For this version, we don't do any other filtering.
  508. $modifier_list = 'InterestArea';
  509. // Cycle through the modifiers as a pipeline.
  510. foreach (split(',',$modifier_list) as $modifier) {
  511. $modifier_object = ModifierFactory::createModifier($modifier);
  512. // Pass it the current ranking list and state
  513. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  514. // take the resulting ranking and pass it through to the next one.
  515. }
  516. $display_array = array();
  517. $cur_rank = 1;
  518. foreach($ranking_array as $SK => $MSD) {
  519. // Check for empty SK, which would only happen if there was an empty project in the array.
  520. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  521. if(strpos($SK, '_') === 0) {
  522. continue;
  523. }
  524. // Look up title for this SK.
  525. preg_match('/^([^_]+)/', $SK, $match);
  526. $subarea = $match[0];
  527. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  528. if(!$cur_title) {
  529. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  530. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  531. }
  532. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  533. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  534. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  535. $project_idea_data = $this->DB_Connection->query($sql);
  536. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  537. $display_array[$cur_rank-1]['subarea'] = $subarea;
  538. $display_array[$cur_rank-1]['filename'] = $SK;
  539. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  540. // We'll standardize the capitalization on these. -- KEL
  541. $display_array[$cur_rank-1]['Filename'] = $SK;
  542. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  543. $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
  544. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
  545. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
  546. // $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  547. // $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  548. // $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  549. // Add a flag if this project has been added to favorites
  550. if(in_array($SK,$favorites)) {
  551. $display_array[$cur_rank-1]['Favorite'] = true;
  552. }
  553. $cur_rank++;
  554. }
  555. return $display_array;
  556. }
  557. /**
  558. * function get_first_recommendation -- returns the top project that would be displayed for this user, which in many cases
  559. * will differ from the top ranked project due to interestarea restrictions, difficulty level, and time required.
  560. *
  561. * Basically, this method runs through all the standard filters and modifiers except the pagination modifier, and
  562. * Returns the first entry in the resulting list.
  563. *
  564. * Primarily, this is used for logging the FirstRecommendation in the recommender_action table.
  565. *
  566. * Kristian Ljungkvist 10/09/07
  567. *
  568. **/
  569. function get_first_recommendation($ranking_array,$individual_profile,$state_array,$favorites) {
  570. // get the list of modifiers from the recommender_configuration for this originID.
  571. $modifier_list = $this->configuration['ModifierList'];
  572. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  573. $page = $state_array['p'];
  574. // Cycle through the modifiers as a pipeline.
  575. foreach (split(',',$modifier_list) as $modifier) {
  576. //echo "instantiating $modifier...<br/>";
  577. // instantiate the appropriate modifier
  578. if($Modifier != 'Paginator') {
  579. $modifier_object = ModifierFactory::createModifier($modifier);
  580. // Pass it the current ranking list and state
  581. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  582. // take the resulting ranking and pass it through to the next one.
  583. }
  584. }
  585. return key($ranking_array);
  586. }
  587. /**
  588. * get_title_of_sk
  589. *
  590. **/
  591. function get_title_of_sk($SK) {
  592. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  593. $project_idea_data = $this->DB_Connection->query($sql);
  594. return $project_idea_data['Title'];
  595. }
  596. /**
  597. * get_solt_details_of_sk
  598. *
  599. * Eleboration on get_title_of_sk to get subareacode, etc.
  600. **/
  601. function get_solt_details_of_sk($SK) {
  602. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  603. $project_idea_data = $this->DB_Connection->query($sql);
  604. return $project_idea_data;
  605. }
  606. /**
  607. * get_msd_of_sk
  608. *
  609. **/
  610. function get_MSD_of_sk($SK,$individual_state,$state_array) {
  611. $stateID = $state_array['sid'];
  612. // First, find the most recent individual-based ranking for this individual.
  613. $mrir = $this->get_most_recent_individual_ranking($individual_state);
  614. // echo "<hr/>mrir:<br/>";
  615. // print_r($mrir);
  616. // echo "<hr/>";
  617. // Find the record in the corresponding ranking list for SK.
  618. $MSD = $mrir[$SK];
  619. return($MSD);
  620. }
  621. /**
  622. * get_most_recent_individual_ranking($individual_state)
  623. *
  624. * @return array: most recent individual ranking
  625. * @author Kristian Ljungkvist
  626. **/
  627. function get_most_recent_individual_ranking($individual_state) {
  628. // Loop through the states and identify the individual-sourced
  629. // ranking with the highest stateID.
  630. $i= 1;
  631. $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
  632. foreach($individual_state as $cur_state) {
  633. //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
  634. //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
  635. if(!$cur_state['ranking']) {continue;}
  636. if($cur_state['ranking']['_Source'] =='Individual') {
  637. $most_recent_ind_ranking = $i;
  638. //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
  639. }
  640. $i++;
  641. }
  642. return $individual_state[$most_recent_ind_ranking]['ranking'];
  643. }
  644. /**
  645. * get_most_recent_individual_ranking_id($individual_state)
  646. *
  647. * @return int: most recent individual ranking ID (stateID in cache)
  648. * @author Kristian Ljungkvist
  649. **/
  650. function get_most_recent_individual_ranking_id($individual_state) {
  651. // Loop through the states and identify the individual-sourced
  652. // ranking with the highest stateID.
  653. $i= 1;
  654. $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
  655. foreach($individual_state as $cur_state) {
  656. //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
  657. //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
  658. if(!$cur_state['ranking']) {continue;}
  659. if($cur_state['ranking']['_Source'] =='Individual') {
  660. $most_recent_ind_ranking = $i;
  661. //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
  662. }
  663. $i++;
  664. }
  665. return $most_recent_ind_ranking;
  666. }
  667. /**
  668. * get_cached_solt_ranking_id($solt_sk, $individual_state)
  669. *
  670. * @return int: stateID of cached ranking for this project (if it's in the cache)
  671. * @author Kristian Ljungkvist
  672. **/
  673. function get_cached_solt_ranking_id($solt_sk,$individual_state) {
  674. $cached_solt_ranking_id = -1; // -1 means no match, in this context.
  675. $i=1;
  676. foreach($individual_state as $cur_state) {
  677. if(!$cur_state['ranking']) {continue;}
  678. if(($cur_state['ranking']['_Source'] =='Project') && ($cur_state['ranking']['_SourceProfile'] == $solt_sk)) {
  679. $cached_solt_ranking_id = $i;
  680. }
  681. $i++;
  682. }
  683. return $cached_solt_ranking_id;
  684. }
  685. /**
  686. * get_page_indicator
  687. *
  688. **/
  689. function get_page_indicator($display_array,$state_array) {
  690. $total_pages = $display_array[1]['total_pages'];
  691. $current_page = $state_array['p'];
  692. // Figure out which page numbers to display. Return an array
  693. // First, figure out if we should display the '<' and '>' buttons.
  694. if($current_page == 1) {
  695. $p_i['Prev'] = 0;
  696. } else {
  697. $p_i['Prev'] = 1;
  698. }
  699. if($current_page +1 > $total_pages) {
  700. $p_i['Next'] = 0;
  701. } else {
  702. $p_i['Next'] = 1;
  703. }
  704. // Now, figure out the range of numbers to display.
  705. // This should be 11 numbers.
  706. // If current_page < 7, 1-11. >=7, current_page - 5 through current_page + 5 (unless total_pages is less.)
  707. if($current_page < 7) {
  708. $pages = range(1,max((min(11,$total_pages)),1)); // The max function is there to handle the case where total_pages is zero (KEL 07/03/08)
  709. } else {
  710. if(($current_page + 5) < $total_pages) {
  711. $pages = range(($current_page - 5),($current_page + 5));
  712. } else {
  713. $pages = range(($current_page -5), $total_pages);
  714. }
  715. }
  716. $p_i['Pages'] = $pages;
  717. return $p_i;
  718. }
  719. /**
  720. * recommendation_summary
  721. *
  722. * @param ranking_array current ranking
  723. * @param max_areas_to_include: hard limit (if non-zero) of how many areas to include
  724. * @param msd_limit: limit, max MSD of areas to include. If any area has a median higher than this,
  725. * it is not included in the results.
  726. * @return array containing a sorted, possibly restricted list of median MSDs per interest area.
  727. * @author Kristian Ljungkvist
  728. **/
  729. function recommendation_summary($ranking_array,$min_areas_to_include,$max_areas_to_include,$msd_limit){
  730. foreach($ranking_array as $SK => $MSD) {
  731. if(strpos($SK, '_') === 0) {
  732. continue;
  733. }
  734. // Build array of subarea,MSD for all subareas in ranking:
  735. // Chem => array(0.23,0.24,0.34)
  736. // Bio => array(0.34,0.55,0.67)
  737. // Extract the sub area
  738. preg_match('/^([^_]+)/', $SK, $match);
  739. $subarea = $match[0];
  740. if($subarea == 'HoneyPot') {
  741. continue;
  742. }
  743. $msd_by_area[$subarea][]=$MSD;
  744. }
  745. // Compute the median msd for each area
  746. foreach($msd_by_area as $area => $msd_list) {
  747. if($area) {
  748. $median_msd_by_area[$area] = $msd_list[floor(count($msd_list)/2)];
  749. }
  750. }
  751. // Now sort the areas in increasing median msd order.
  752. asort($median_msd_by_area);
  753. // First, if msd_limit is set, exclude any areas with median msd higher than that number
  754. if($msd_limit) {
  755. foreach($median_msd_by_area as $area => $median_msd) {
  756. if($median_msd <= $msd_limit) {
  757. $restricted_list[$area] = $median_msd;
  758. }
  759. }
  760. if(count($restricted_list) > $min_areas_to_include) {
  761. $median_msd_by_area = $restricted_list;
  762. }
  763. }
  764. // Secondly, if max_areas_to_include is set, place a hard-limit on the number of items in the list.
  765. if($max_areas_to_include) {
  766. $median_msd_by_area = array_slice($median_msd_by_area, 0, $max_areas_to_include);
  767. }
  768. // Look up interest area titles, etc. from project_ideas table.
  769. $cur_rank = 0;
  770. foreach($median_msd_by_area as $area => $median_msd) {
  771. $sql = "SELECT * FROM project_ideas WHERE SubAreaCode='$area' LIMIT 1";
  772. $project_idea_area_data = $this->DB_Connection->query($sql);
  773. $display_array[$cur_rank]['subareacode'] = $project_idea_area_data['SubAreaCode'];
  774. $display_array[$cur_rank]['subarea'] = $project_idea_area_data['SubArea'];
  775. $display_array[$cur_rank]['area'] = $project_idea_area_data['Area'];
  776. $display_array[$cur_rank]['subarea_median_msd'] = $median_msd;
  777. $cur_rank++;
  778. }
  779. // return the displayable list, limited perhaps by length and max MSD.
  780. return($display_array);
  781. }
  782. }
  783. ?>