PageRenderTime 61ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 1ms

/Recommender_Bayes.class.php

https://bitbucket.org/kljungkvist/test-fork
PHP | 976 lines | 489 code | 197 blank | 290 comment | 59 complexity | 30f5749c8800207d7f7c5cf5d1ed1b3f MD5 | raw file
  1. <?php
  2. /***********************************************************
  3. * Recommender Class -- Bayes/memcache version
  4. *
  5. * 06/16/2009 -- KEL created Bayes/Memcache fork for 2009 version
  6. * Created 08/07/2007 by Kristian Ljungkvist Based on Recommender test bed work.
  7. *
  8. * Copyright 2007 Science Buddies. All Rights Reserved
  9. ************************************************************/
  10. require_once 'sciencebuddies/Error.class.php';
  11. require_once 'sciencebuddies/Database.class.php';
  12. require_once 'sciencebuddies/Modifier.class.php';
  13. class Recommender
  14. {
  15. var $sk_summary = array();
  16. var $configuration = array();
  17. var $memcache_handle = null;
  18. var $DB_Connection = null;
  19. var $dataset_filename = "";
  20. /**
  21. * Constructor
  22. *
  23. *
  24. * @author Kristian Ljungkvist
  25. */
  26. function __construct($db,$originID,$dataset,$memcache) {
  27. // Bayes/memcache version uses dataset as a prefix into memcache for the various datasets.
  28. if(!$db) {
  29. // No database handle is considered a critical error.
  30. trigger_error("No Database handle passed to Recommender constructor",E_USER_ERROR);
  31. exit;
  32. }
  33. if(!$memcache) {
  34. // No memcache handle is considered a critical error.
  35. trigger_error("No Memcache handle passed to Recommender constructor",E_USER_ERROR);
  36. exit;
  37. }
  38. // Now, get configuration for recommender with the current originID -- default: 1.
  39. $this->DB_Connection = $db;
  40. $sql = "SELECT * FROM recommender_config where originID=$originID";
  41. $this->configuration = $this->DB_Connection->query($sql);
  42. $this->memcache_handle = $memcache;
  43. $this->dataset_filename = $dataset;
  44. $this->sk_summary = $memcache->get("$dataset:sk_summary");
  45. }
  46. /**
  47. * individual_ranking
  48. *
  49. * Returns the ranking of SKs for the current dataset and the individual passed in.
  50. * @author Kristian Ljungkvist
  51. */
  52. function individual_ranking($ind_summary) {
  53. $ranking_array = $this->algorithm_bayes($ind_summary,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
  54. // $ranking_array = $this->algorithm070220h($this->sk_summary,$ind_summary,4);
  55. // KEL 10/18/07 -- Also tag the resulting ranking with the ProfileID of the profile that created it.
  56. // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
  57. $ranking_array['_Source'] = 'Individual';
  58. $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
  59. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  60. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  61. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  62. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  63. $ranking_array['_DatasetFilename'] = $this->dataset_filename; // KEL 06/24/09 -- Store the originating dataset filename for future reference
  64. // since we'll have several "neighborhood" datasets.
  65. return $ranking_array;
  66. }
  67. /**
  68. * show_others_like_this
  69. *
  70. * Returns the ranking of SKs against the passed-in SK.
  71. * @author Kristian Ljungkvist
  72. */
  73. function show_others_like_this($solt_sk) {
  74. // get profile for the source project
  75. // Look up title for this SK.
  76. preg_match('/^([^_]+)/', $solt_sk, $match);
  77. $subarea = $match[0];
  78. $source_project_profile = $this->sk_summary[$subarea]['SKs'][$solt_sk];
  79. // Generate the ranking of that project against the others
  80. // NOTE: We need to generate the project profile somehow in the new Bayes context. We'll most likely need to store one record per project in memcache with
  81. // the project profile. This will need to happen in the add_to_memcache utility.
  82. $ranking_array = $this->algorithm_bayes($source_project_profile,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
  83. // KEL 10/18/07 -- Also tag the resulting ranking with the source and the ProfileID of the profile that created it.
  84. // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
  85. $ranking_array['_Source'] = 'Project';
  86. $ranking_array['_SourceProfile'] = $solt_sk; // For show others like this, we'll use the project filename as the source ID.
  87. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  88. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  89. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  90. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  91. return $ranking_array;
  92. }
  93. /**
  94. * static_ranking
  95. *
  96. * Essentially converts the current dataset into a "ranking" of the form the rendering methods expect.
  97. * Used for the dynamic interest area pages.
  98. * @author Kristian Ljungkvist 05/21/08
  99. */
  100. function static_ranking() {
  101. // Convert the sk_summary member to the SK->MSD form. We'll set all MSD's to 1.0 for this "ranking"
  102. foreach($this->sk_summary as $subarea => $subarea_array) {
  103. foreach($subarea_array['SKs'] as $SK => $SK_array) {
  104. $ranking_array[$SK] = 1.0;
  105. }
  106. }
  107. $ranking_array['_Source'] = 'Individual';
  108. $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
  109. // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
  110. // This is so that we can check for updated datasets and invalidate the cache in those cases.
  111. $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
  112. $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
  113. return $ranking_array;
  114. }
  115. function algorithm_bayes($individual_array,$threshold,$mof,$recenter,$gcv, $dataset, $memcache) {
  116. // Get list of prior probabilities
  117. $prior_probabilities_list = $memcache->get("$dataset:prior_prob");
  118. echo "algorithm_bayes:prior_probabilities_list:<br/>";
  119. echo "<pre/>".print_r($prior_probabilities_list)."</pre>";
  120. $MATURITY = 3; // offset in to the answer_list for the maturity
  121. $unordered_ranking = array();
  122. $project_prob = 1;
  123. $num_matching_questions = 0; // keeps track of the total number of matching questions between ind and project.
  124. $highest_maturity = 0; // records the maturity of the most mature question
  125. $skipped_questions = 0;
  126. $matching_questions_ref = array();
  127. $highest_maturities_ref = array();
  128. $question_maturity_threshold = 100;
  129. foreach($individual_array['Questions'] as $question => $user_answer) {
  130. // Grab the corresponding project data list from memcached.
  131. $project_list = $memcache->get("$dataset:$question:$user_answer");
  132. $project_maturity_list = $memcache->get("$dataset:$question:3");
  133. foreach($project_list as $project => $answer_prob) {
  134. $proj_ptr = &$unordered_ranking[$project]; // get a pointer to the project in the ranking. Speeds things up quite a bit.
  135. $maturity_ptr = &$project_maturity_list[$project];
  136. if($answer_prob == 0) {continue;}
  137. if($maturity_ptr > $question_maturity_threshold) {
  138. if(!$proj_ptr) {
  139. $proj_ptr = 1.0;
  140. }
  141. $proj_ptr = $proj_ptr * $answer_prob;
  142. $mqr_ptr = &$matching_questions_ref[$project];
  143. $mqr_ptr++;
  144. $project_hmr_ptr = &$highest_maturity_ref[$project];
  145. if($maturity_ptr > $project_hmr_ptr) {
  146. $project_hmr_ptr = $maturity_ptr;
  147. }
  148. }
  149. }
  150. }
  151. // Now need to loop through the projects and do the nth root/ prior probability calculations.
  152. $include_prior_probability = 1;
  153. foreach($matching_questions_ref as $project => $num_matching_questions) {
  154. $unordered_ranking[$project] = $unordered_ranking[$project] * $prior_probabilities_list[$project]; // Multiply by the prior probability.
  155. $unordered_ranking[$project] = pow($unordered_ranking[$project],(1 / ($num_matching_questions +1))); // Take the nth root where n = number of matching questions for this project.
  156. if(($unordered_ranking[$project] < 1) && ($highest_maturity_ref[$project] > $threshold)) {
  157. $filtered_unordered_ranking[$project] = $unordered_ranking[$project];
  158. }
  159. }
  160. // Now sort the unordered ranking
  161. arsort($filtered_unordered_ranking);
  162. $ranking_array = $filtered_unordered_ranking;
  163. foreach($ranking_array as $SK => $prob) {
  164. $return_ranking[$SK] = $prob;
  165. }
  166. return($return_ranking);
  167. }
  168. /**
  169. * function algorithm070220h
  170. *
  171. **/
  172. function algorithm070220h($big_array,$individual_array,$threshold,$mof,$recenter,$gcv) {
  173. // big_array holds an entire subject area. Individual_array is one individual's responses.
  174. $squared_diff = array();
  175. $absolute_diff = array();
  176. $MSD = array();
  177. $MAV = array();
  178. $SAV = array();
  179. // "<hr/>gcv = $gcv, recenter = $recenter<hr/>";
  180. if(!$recenter) {
  181. if($this->configuration['GlobalRecenterValue'] > 0) {
  182. $recenter = true;
  183. $gcv = $this->configuration['GlobalRecenterValue'];
  184. }
  185. }
  186. if($recenter) {
  187. /** If recenter == True, alter the individual_array by recentering the values:
  188. *
  189. * 1) Calculate the overall average of all the questions that the user answered = UserAvg
  190. * 2) Calculate a re-centering value for the user:
  191. * UserOffset = UserAvg ? GlobalCenterValue [from the template]
  192. * 3) Before calculating either the squared or absolute difference, adjust the user values in the $individual_array, for each question:
  193. * NewUserValuei = OldUserValuei ? UserOffset
  194. **/
  195. if(!isset($gcv)) {
  196. $gcv = 1; // Default Global Center Value to 1
  197. }
  198. $usercount = 0;
  199. $usertotal = 0;
  200. foreach($individual_array['Questions'] as $question => $answer) {
  201. if(!preg_match("/^_/", $question)) {
  202. continue;// Skip questions other than "01c, 1e," etc.
  203. }
  204. $usertotal += $answer;
  205. $usercount++;
  206. }
  207. $useravg = $usertotal / $usercount;
  208. //echo "<hr/>useravg: $useravg<hr/>";
  209. $useroffset = $useravg - $gcv;
  210. //echo "<hr/>useroffset: $useroffset<hr/>";
  211. array_walk($individual_array['Questions'],'recenter_callback',$useroffset);
  212. }
  213. // For each SK, for each survey question, calculate the squared difference
  214. // between the score of the user on the question, and the average for all users
  215. // on that question.
  216. foreach($big_array as $subarea => $subarea_array) {
  217. foreach($subarea_array['SKs'] as $SK => $SK_array) {
  218. foreach($SK_array['Questions'] as $question => $answer) {
  219. if(!preg_match("/^_/", $question)) {
  220. //echo "<HR/> skipping question $question because it's not a survey question";
  221. continue;
  222. } // Skip questions other than "01c, 1e," etc.
  223. if(!isset($individual_array['Questions'][$question])) {
  224. //echo "<HR/> skipping question $question because it's not in the individual_array";
  225. continue;
  226. } // Skip questions the user didn't answer
  227. $squared_diff[$SK][$question] = pow(($individual_array['Questions'][$question] - $answer),2);
  228. $absolute_diff[$SK][$question] = abs($individual_array['Questions'][$question] - $answer);
  229. }
  230. }
  231. }
  232. // For each SK, calculate the MSD by summing the squared differences and dividing by the number of questions
  233. foreach($squared_diff as $SD_SK => $SD_array) {
  234. foreach($SD_array as $sd) {
  235. //echo "<HR/>MSD[$SD_SK] += $sd";
  236. $MSD[$SD_SK] += $sd;
  237. }
  238. // echo "<HR/>MSD[$SD_SK] = ".$MSD[$SD_SK]." / ".count($SD_array);
  239. // KEL 02/08/08 -- Added rounding to 5 decimal places to save space in the cache.
  240. $MSD[$SD_SK] = round($MSD[$SD_SK] / count($SD_array),5);
  241. }
  242. asort($MSD); // Sort list by ascending MSD -- Best match first.
  243. // MAV -- Mean Absolute Value of Difference (MAV)
  244. foreach($absolute_diff as $SD_SK => $SD_array) {
  245. foreach($SD_array as $sd) {
  246. $MAV[$SD_SK] += $sd;
  247. }
  248. $MAV[$SD_SK] = $MAV[$SD_SK] / count($SD_array);
  249. }
  250. asort($MAV); // Sort list by ascending MAV -- Best match first.
  251. // SAV -- Mean Absolute Value of Difference (SAV)
  252. foreach($absolute_diff as $SD_SK => $SD_array) {
  253. foreach($SD_array as $sd) {
  254. $SAV[$SD_SK] += $sd;
  255. }
  256. }
  257. asort($SAV); // Sort list by ascending SAV -- Best match first.
  258. switch($mof) {
  259. case 'MSD':
  260. return $MSD;
  261. break;
  262. case 'MAV':
  263. return $MAV;
  264. break;
  265. case 'SAV':
  266. return $SAV;
  267. break;
  268. }
  269. // Default: MSD
  270. return $MSD;
  271. }
  272. /**
  273. * function post_process -- Run through a set of modifiers *once* on creation of a new ranking list.
  274. *
  275. * Primarily, this is used for new project randomization, since we want the newly randomized list to be
  276. * cached just like the original would be.
  277. *
  278. * Kristian Ljungkvist 09/19/07
  279. *
  280. **/
  281. function post_process($ranking_array,$individual_profile,$state_array) {
  282. $modifier_object = ModifierFactory::createModifier('NewProjectRandomizer');
  283. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  284. $modifier_object = ModifierFactory::createModifier('ProjectRandomizer');
  285. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  286. return $ranking_array;
  287. }
  288. /**
  289. * function render_results
  290. *
  291. **/
  292. function render_results($ranking_array,$individual_profile,$state_array,$favorites) {
  293. // Generate a display version of the ranking array.
  294. // get the list of modifiers from the recommender_configuration for this originID.
  295. $modifier_list = $this->configuration['ModifierList'];
  296. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  297. $page = $state_array['p'];
  298. // Cycle through the modifiers as a pipeline.
  299. foreach (split(',',$modifier_list) as $modifier) {
  300. //echo "instantiating $modifier...<br/>";
  301. // instantiate the appropriate modifier
  302. if($modifier != 'Paginator') {
  303. // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
  304. $modifier_object = ModifierFactory::createModifier($modifier);
  305. // Pass it the current ranking list and state
  306. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  307. // take the resulting ranking and pass it through to the next one.
  308. }
  309. }
  310. // Get the total number of pages in the rendered ranking
  311. $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
  312. // Now, go through the paginator:
  313. $modifier_object = ModifierFactory::createModifier('Paginator');
  314. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  315. $display_array = array();
  316. $cur_rank = 1;
  317. foreach($ranking_array as $SK => $MSD) {
  318. // Check for empty SK, which would only happen if there was an empty project in the array.
  319. // KEL 01/30/08
  320. if(!$SK) {
  321. // Notify me by email if we get an empty SK -- KEL 01/30/08
  322. $msg = "cur_rank = $cur_rank\n";
  323. $msg .= "ranking_array:\n";
  324. $msg .= print_r($ranking_array,true);
  325. $msg .= "\nIndividualID={$state_array['rid']}\n";
  326. $msg .= "\npage={$state_array['p']}\n";
  327. $msg .= "\nStateID={$state_array['sid']}\n";
  328. mail("kristian.ljungkvist@gmail.com","Recommender.class.php -- Empty SK",$msg);
  329. continue;
  330. }
  331. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  332. if(strpos($SK, '_') === 0) {
  333. continue;
  334. }
  335. // Look up title for this SK.
  336. preg_match('/^([^_]+)/', $SK, $match);
  337. $subarea = $match[0];
  338. /*
  339. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  340. if(!$cur_title) {
  341. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  342. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  343. }
  344. */
  345. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  346. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  347. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  348. $project_idea_data = $this->DB_Connection->query($sql);
  349. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  350. $display_array[$cur_rank-1]['subarea'] = $subarea;
  351. $display_array[$cur_rank-1]['filename'] = $SK;
  352. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  353. // Store total number of pages in result set in top entry of display_array.
  354. $display_array[$cur_rank-1]['total_pages'] = $total_pages;
  355. // We'll standardize the capitalization on these. -- KEL
  356. $display_array[$cur_rank-1]['Filename'] = $SK;
  357. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  358. $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
  359. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
  360. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
  361. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  362. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  363. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  364. // Add a flag if this project has been added to favorites
  365. if(in_array($SK,$favorites)) {
  366. $display_array[$cur_rank-1]['Favorite'] = true;
  367. }
  368. $cur_rank++;
  369. }
  370. return $display_array;
  371. }
  372. /**
  373. * render_favorites
  374. *
  375. **/
  376. function render_favorites($favorites) {
  377. $display_array = array();
  378. $cur_rank = 1;
  379. foreach($favorites as $SK) {
  380. // Look up title for this SK.
  381. preg_match('/^([^_]+)/', $SK, $match);
  382. $subarea = $match[0];
  383. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  384. if(!$cur_title) {
  385. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  386. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  387. }
  388. /*
  389. // Look up title for this SK.
  390. preg_match('/^([^_]+)/', $SK, $match);
  391. $subarea = $match[0];
  392. //echo "[$subarea],";
  393. //echo "[$subarea][$SK],";
  394. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  395. //echo "[$cur_title]<br/>";
  396. if(!$cur_title) {
  397. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  398. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  399. }
  400. */
  401. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  402. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  403. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  404. $project_idea_data = $this->DB_Connection->query($sql);
  405. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  406. $display_array[$cur_rank-1]['subarea'] = $subarea;
  407. $display_array[$cur_rank-1]['filename'] = $SK;
  408. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  409. // The following are temporarily fetched from the DB. We should probably cache these in the sk_summary data instead. -- KEL
  410. $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
  411. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
  412. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
  413. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  414. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  415. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  416. $display_array[$cur_rank-1]['Favorite'] = true; // It is a favorite, after all...
  417. $cur_rank++;
  418. }
  419. return $display_array;
  420. }
  421. /**
  422. * function render_interest_area
  423. *
  424. **/
  425. function render_interest_area($ranking_array,$individual_profile,$state_array,$favorites) {
  426. // Generate a display version of the ranking array, filtered by interest area.
  427. // get the list of modifiers from the recommender_configuration for this originID.
  428. $modifier_list = $this->configuration['ModifierList'];
  429. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  430. $page = $state_array['p'];
  431. // Add the InterestArea modifer to the pipeline
  432. $modifier_list = 'DifficultyLevel,InterestArea,Deduper'; // Don't do any randomization on interest areas.
  433. // $modifier_list .= ',InterestArea';
  434. //AreaAssignment,DifficultyLevel,TimeRequired,Deduper,FirstPageNotRandom,HoneyPot,Paginator
  435. // Cycle through the modifiers as a pipeline.
  436. foreach (split(',',$modifier_list) as $modifier) {
  437. // instantiate the appropriate modifier
  438. if(($modifier != 'Paginator') && ($modifier != 'AreaAssignment')) {
  439. // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
  440. // Also skip the AreaAssignment modifier, since we want to override that with the InterestArea Modifier.
  441. $modifier_object = ModifierFactory::createModifier($modifier);
  442. // Pass it the current ranking list and state
  443. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  444. // take the resulting ranking and pass it through to the next one.
  445. }
  446. }
  447. // Get the total number of pages in the rendered ranking
  448. $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
  449. // Now, go through the paginator:
  450. $modifier_object = ModifierFactory::createModifier('Paginator');
  451. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  452. $display_array = array();
  453. $cur_rank = 1;
  454. foreach($ranking_array as $SK => $MSD) {
  455. // Check for empty SK, which would only happen if there was an empty project in the array.
  456. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  457. if(strpos($SK, '_') === 0) {
  458. continue;
  459. }
  460. // Look up title for this SK.
  461. preg_match('/^([^_]+)/', $SK, $match);
  462. $subarea = $match[0];
  463. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  464. if(!$cur_title) {
  465. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  466. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  467. }
  468. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  469. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  470. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  471. $project_idea_data = $this->DB_Connection->query($sql);
  472. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  473. $display_array[$cur_rank-1]['subarea'] = $subarea;
  474. $display_array[$cur_rank-1]['filename'] = $SK;
  475. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  476. // Store total number of pages in result set in top entry of display_array.
  477. $display_array[$cur_rank-1]['total_pages'] = $total_pages;
  478. // We'll standardize the capitalization on these. -- KEL
  479. $display_array[$cur_rank-1]['Filename'] = $SK;
  480. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  481. $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
  482. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
  483. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
  484. $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  485. $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  486. $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  487. // Add a flag if this project has been added to favorites
  488. if(in_array($SK,$favorites)) {
  489. $display_array[$cur_rank-1]['Favorite'] = true;
  490. }
  491. $cur_rank++;
  492. }
  493. return $display_array;
  494. }
  495. /**
  496. * function render_interest_area_all_projects
  497. *
  498. **/
  499. function render_interest_area_all_projects($ranking_array,$individual_profile,$state_array,$favorites) {
  500. // Generate a display version of the ranking array, filtered by interest area.
  501. // For this version, we don't do any other filtering.
  502. $modifier_list = 'InterestArea';
  503. // Cycle through the modifiers as a pipeline.
  504. foreach (split(',',$modifier_list) as $modifier) {
  505. $modifier_object = ModifierFactory::createModifier($modifier);
  506. // Pass it the current ranking list and state
  507. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  508. // take the resulting ranking and pass it through to the next one.
  509. }
  510. $display_array = array();
  511. $cur_rank = 1;
  512. foreach($ranking_array as $SK => $MSD) {
  513. // Check for empty SK, which would only happen if there was an empty project in the array.
  514. // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
  515. if(strpos($SK, '_') === 0) {
  516. continue;
  517. }
  518. // Look up title for this SK.
  519. preg_match('/^([^_]+)/', $SK, $match);
  520. $subarea = $match[0];
  521. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
  522. if(!$cur_title) {
  523. // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
  524. $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
  525. }
  526. // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
  527. // We'll probably decide to cache these in the sk_summary for performance reasons later.
  528. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  529. $project_idea_data = $this->DB_Connection->query($sql);
  530. $display_array[$cur_rank-1]['rank'] = $cur_rank;
  531. $display_array[$cur_rank-1]['subarea'] = $subarea;
  532. $display_array[$cur_rank-1]['filename'] = $SK;
  533. $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
  534. // We'll standardize the capitalization on these. -- KEL
  535. $display_array[$cur_rank-1]['Filename'] = $SK;
  536. $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
  537. $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
  538. $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
  539. $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
  540. // $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
  541. // $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
  542. // $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
  543. // Add a flag if this project has been added to favorites
  544. if(in_array($SK,$favorites)) {
  545. $display_array[$cur_rank-1]['Favorite'] = true;
  546. }
  547. $cur_rank++;
  548. }
  549. return $display_array;
  550. }
  551. /**
  552. * function get_first_recommendation -- returns the top project that would be displayed for this user, which in many cases
  553. * will differ from the top ranked project due to interestarea restrictions, difficulty level, and time required.
  554. *
  555. * Basically, this method runs through all the standard filters and modifiers except the pagination modifier, and
  556. * Returns the first entry in the resulting list.
  557. *
  558. * Primarily, this is used for logging the FirstRecommendation in the recommender_action table.
  559. *
  560. * Kristian Ljungkvist 10/09/07
  561. *
  562. **/
  563. function get_first_recommendation($ranking_array,$individual_profile,$state_array,$favorites) {
  564. // get the list of modifiers from the recommender_configuration for this originID.
  565. $modifier_list = $this->configuration['ModifierList'];
  566. $projects_per_page = $this->configuration['MaxProjectsPerPage'];
  567. $page = $state_array['p'];
  568. // Cycle through the modifiers as a pipeline.
  569. foreach (split(',',$modifier_list) as $modifier) {
  570. //echo "instantiating $modifier...<br/>";
  571. // instantiate the appropriate modifier
  572. if($Modifier != 'Paginator') {
  573. $modifier_object = ModifierFactory::createModifier($modifier);
  574. // Pass it the current ranking list and state
  575. $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
  576. // take the resulting ranking and pass it through to the next one.
  577. }
  578. }
  579. return key($ranking_array);
  580. }
  581. /**
  582. * get_title_of_sk
  583. *
  584. **/
  585. function get_title_of_sk($SK) {
  586. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  587. $project_idea_data = $this->DB_Connection->query($sql);
  588. return $project_idea_data['Title'];
  589. }
  590. /**
  591. * get_solt_details_of_sk
  592. *
  593. * Eleboration on get_title_of_sk to get subareacode, etc.
  594. **/
  595. function get_solt_details_of_sk($SK) {
  596. $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
  597. $project_idea_data = $this->DB_Connection->query($sql);
  598. return $project_idea_data;
  599. }
  600. /**
  601. * get_msd_of_sk
  602. *
  603. **/
  604. function get_MSD_of_sk($SK,$individual_state,$state_array) {
  605. $stateID = $state_array['sid'];
  606. // First, find the most recent individual-based ranking for this individual.
  607. $mrir = $this->get_most_recent_individual_ranking($individual_state);
  608. // echo "<hr/>mrir:<br/>";
  609. // print_r($mrir);
  610. // echo "<hr/>";
  611. // Find the record in the corresponding ranking list for SK.
  612. $MSD = $mrir[$SK];
  613. return($MSD);
  614. }
  615. /**
  616. * get_most_recent_individual_ranking($individual_state)
  617. *
  618. * @return array: most recent individual ranking
  619. * @author Kristian Ljungkvist
  620. **/
  621. function get_most_recent_individual_ranking($individual_state) {
  622. // Loop through the states and identify the individual-sourced
  623. // ranking with the highest stateID.
  624. $i= 1;
  625. $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
  626. foreach($individual_state as $cur_state) {
  627. //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
  628. //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
  629. if(!$cur_state['ranking']) {continue;}
  630. if($cur_state['ranking']['_Source'] =='Individual') {
  631. $most_recent_ind_ranking = $i;
  632. //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
  633. }
  634. $i++;
  635. }
  636. return $individual_state[$most_recent_ind_ranking]['ranking'];
  637. }
  638. /**
  639. * get_most_recent_individual_ranking_id($individual_state)
  640. *
  641. * @return int: most recent individual ranking ID (stateID in cache)
  642. * @author Kristian Ljungkvist
  643. **/
  644. function get_most_recent_individual_ranking_id($individual_state) {
  645. // Loop through the states and identify the individual-sourced
  646. // ranking with the highest stateID.
  647. $i= 1;
  648. $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
  649. foreach($individual_state as $cur_state) {
  650. //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
  651. //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
  652. if(!$cur_state['ranking']) {continue;}
  653. if($cur_state['ranking']['_Source'] =='Individual') {
  654. $most_recent_ind_ranking = $i;
  655. //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
  656. }
  657. $i++;
  658. }
  659. return $most_recent_ind_ranking;
  660. }
  661. /**
  662. * get_cached_solt_ranking_id($solt_sk, $individual_state)
  663. *
  664. * @return int: stateID of cached ranking for this project (if it's in the cache)
  665. * @author Kristian Ljungkvist
  666. **/
  667. function get_cached_solt_ranking_id($solt_sk,$individual_state) {
  668. $cached_solt_ranking_id = -1; // -1 means no match, in this context.
  669. $i=1;
  670. foreach($individual_state as $cur_state) {
  671. if(!$cur_state['ranking']) {continue;}
  672. if(($cur_state['ranking']['_Source'] =='Project') && ($cur_state['ranking']['_SourceProfile'] == $solt_sk)) {
  673. $cached_solt_ranking_id = $i;
  674. }
  675. $i++;
  676. }
  677. return $cached_solt_ranking_id;
  678. }
  679. /**
  680. * get_page_indicator
  681. *
  682. **/
  683. function get_page_indicator($display_array,$state_array) {
  684. $total_pages = $display_array[1]['total_pages'];
  685. $current_page = $state_array['p'];
  686. // Figure out which page numbers to display. Return an array
  687. // First, figure out if we should display the '<' and '>' buttons.
  688. if($current_page == 1) {
  689. $p_i['Prev'] = 0;
  690. } else {
  691. $p_i['Prev'] = 1;
  692. }
  693. if($current_page +1 > $total_pages) {
  694. $p_i['Next'] = 0;
  695. } else {
  696. $p_i['Next'] = 1;
  697. }
  698. // Now, figure out the range of numbers to display.
  699. // This should be 11 numbers.
  700. // If current_page < 7, 1-11. >=7, current_page - 5 through current_page + 5 (unless total_pages is less.)
  701. if($current_page < 7) {
  702. $pages = range(1,max((min(11,$total_pages)),1)); // The max function is there to handle the case where total_pages is zero (KEL 07/03/08)
  703. } else {
  704. if(($current_page + 5) < $total_pages) {
  705. $pages = range(($current_page - 5),($current_page + 5));
  706. } else {
  707. $pages = range(($current_page -5), $total_pages);
  708. }
  709. }
  710. $p_i['Pages'] = $pages;
  711. return $p_i;
  712. }
  713. /**
  714. * recommendation_summary
  715. *
  716. * @param ranking_array current ranking
  717. * @param max_areas_to_include: hard limit (if non-zero) of how many areas to include
  718. * @param msd_limit: limit, max MSD of areas to include. If any area has a median higher than this,
  719. * it is not included in the results.
  720. * @return array containing a sorted, possibly restricted list of median MSDs per interest area.
  721. * @author Kristian Ljungkvist
  722. **/
  723. function recommendation_summary($ranking_array,$min_areas_to_include,$max_areas_to_include,$msd_limit){
  724. foreach($ranking_array as $SK => $MSD) {
  725. if(strpos($SK, '_') === 0) {
  726. continue;
  727. }
  728. // Build array of subarea,MSD for all subareas in ranking:
  729. // Chem => array(0.23,0.24,0.34)
  730. // Bio => array(0.34,0.55,0.67)
  731. // Extract the sub area
  732. preg_match('/^([^_]+)/', $SK, $match);
  733. $subarea = $match[0];
  734. if($subarea == 'HoneyPot') {
  735. continue;
  736. }
  737. $msd_by_area[$subarea][]=$MSD;
  738. }
  739. // Compute the median msd for each area
  740. foreach($msd_by_area as $area => $msd_list) {
  741. if($area) {
  742. $median_msd_by_area[$area] = $msd_list[floor(count($msd_list)/2)];
  743. }
  744. }
  745. // Now sort the areas in increasing median msd order.
  746. asort($median_msd_by_area);
  747. // First, if msd_limit is set, exclude any areas with median msd higher than that number
  748. if($msd_limit) {
  749. foreach($median_msd_by_area as $area => $median_msd) {
  750. if($median_msd <= $msd_limit) {
  751. $restricted_list[$area] = $median_msd;
  752. }
  753. }
  754. if(count($restricted_list) > $min_areas_to_include) {
  755. $median_msd_by_area = $restricted_list;
  756. }
  757. }
  758. // Secondly, if max_areas_to_include is set, place a hard-limit on the number of items in the list.
  759. if($max_areas_to_include) {
  760. $median_msd_by_area = array_slice($median_msd_by_area, 0, $max_areas_to_include);
  761. }
  762. // Look up interest area titles, etc. from project_ideas table.
  763. $cur_rank = 0;
  764. foreach($median_msd_by_area as $area => $median_msd) {
  765. $sql = "SELECT * FROM project_ideas WHERE SubAreaCode='$area' LIMIT 1";
  766. $project_idea_area_data = $this->DB_Connection->query($sql);
  767. $display_array[$cur_rank]['subareacode'] = $project_idea_area_data['SubAreaCode'];
  768. $display_array[$cur_rank]['subarea'] = $project_idea_area_data['SubArea'];
  769. $display_array[$cur_rank]['area'] = $project_idea_area_data['Area'];
  770. $display_array[$cur_rank]['subarea_median_msd'] = $median_msd;
  771. $cur_rank++;
  772. }
  773. // return the displayable list, limited perhaps by length and max MSD.
  774. return($display_array);
  775. }
  776. }
  777. ?>