/Recommender.class.php
PHP | 985 lines | 492 code | 200 blank | 293 comment | 60 complexity | d61cb305b0fd017966217475bfd194ab MD5 | raw file
- <?php
- /***********************************************************
- * Recommender Class -- Bayes/memcache version
- *
- * 06/16/2009 -- KEL created Bayes/Memcache fork for 2009 version
- * Created 08/07/2007 by Kristian Ljungkvist Based on Recommender test bed work.
- *
- * Copyright 2007 Science Buddies. All Rights Reserved
- ************************************************************/
- require_once 'sciencebuddies/Error.class.php';
- require_once 'sciencebuddies/Database.class.php';
- require_once 'sciencebuddies/Modifier.class.php';
- class Recommender
- {
- var $sk_summary = array();
- var $configuration = array();
- var $memcache_handle = null;
- var $DB_Connection = null;
- var $dataset_filename = "";
- var $MEMCACHE_SOCKET_FILE = 'unix:///home/science/memcached.sock';
- /**
- * Constructor
- *
- *
- * @author Kristian Ljungkvist
- */
- function __construct($db,$originID,$dataset,$memcache) {
- // Bayes/memcache version uses dataset as a prefix into memcache for the various datasets.
- if(!$db) {
- // No database handle is considered a critical error.
- trigger_error("No Database handle passed to Recommender constructor",E_USER_ERROR);
- exit;
- }
- if(!$memcache) {
- // Here, let's not force the client to pass in a memcache handle. Instead, we'll create one here if it's not passed in.
- $memcache = new Memcache;
- $memcache->connect($this->MEMCACHE_SOCKET_FILE,0) or trigger_error ("Could not connect to memcache", E_USER_ERROR);
-
- // Still no memcache handle? Now it's considered a critical error.
- if(!$memcache) {
- trigger_error("No Memcache handle passed to Recommender constructor",E_USER_ERROR);
- exit;
- }
-
- }
- // Now, get configuration for recommender with the current originID -- default: 1.
- $this->DB_Connection = $db;
- $sql = "SELECT * FROM recommender_config where originID=$originID";
- $this->configuration = $this->DB_Connection->query($sql);
- $this->memcache_handle = $memcache;
- $this->dataset_filename = $dataset;
- $this->sk_summary = $memcache->get("$dataset:sk_summary");
- }
- /**
- * individual_ranking
- *
- * Returns the ranking of SKs for the current dataset and the individual passed in.
- * @author Kristian Ljungkvist
- */
- function individual_ranking($ind_summary) {
- $ranking_array = $this->algorithm_bayes($ind_summary,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
- // $ranking_array = $this->algorithm070220h($this->sk_summary,$ind_summary,4);
- // KEL 10/18/07 -- Also tag the resulting ranking with the ProfileID of the profile that created it.
- // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
- $ranking_array['_Source'] = 'Individual';
- $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
- // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
- // This is so that we can check for updated datasets and invalidate the cache in those cases.
- $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
- $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
-
- $ranking_array['_DatasetFilename'] = $this->dataset_filename; // KEL 06/24/09 -- Store the originating dataset filename for future reference
- // since we'll have several "neighborhood" datasets.
- return $ranking_array;
- }
- /**
- * show_others_like_this
- *
- * Returns the ranking of SKs against the passed-in SK.
- * @author Kristian Ljungkvist
- */
- function show_others_like_this($solt_sk) {
- // get profile for the source project
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $solt_sk, $match);
- $subarea = $match[0];
- $source_project_profile = $this->sk_summary[$subarea]['SKs'][$solt_sk];
- // Generate the ranking of that project against the others
-
- // NOTE: We need to generate the project profile somehow in the new Bayes context. We'll most likely need to store one record per project in memcache with
- // the project profile. This will need to happen in the add_to_memcache utility.
- $ranking_array = $this->algorithm_bayes($source_project_profile,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
- // KEL 10/18/07 -- Also tag the resulting ranking with the source and the ProfileID of the profile that created it.
- // We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
- $ranking_array['_Source'] = 'Project';
- $ranking_array['_SourceProfile'] = $solt_sk; // For show others like this, we'll use the project filename as the source ID.
-
- // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
- // This is so that we can check for updated datasets and invalidate the cache in those cases.
- $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
- $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
- return $ranking_array;
- }
- /**
- * static_ranking
- *
- * Essentially converts the current dataset into a "ranking" of the form the rendering methods expect.
- * Used for the dynamic interest area pages.
- * @author Kristian Ljungkvist 05/21/08
- */
- function static_ranking() {
-
- // Convert the sk_summary member to the SK->MSD form. We'll set all MSD's to 1.0 for this "ranking"
-
- foreach($this->sk_summary as $subarea => $subarea_array) {
- foreach($subarea_array['SKs'] as $SK => $SK_array) {
- $ranking_array[$SK] = 1.0;
- }
- }
- $ranking_array['_Source'] = 'Individual';
- $ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
- // KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
- // This is so that we can check for updated datasets and invalidate the cache in those cases.
- $stat_array = stat("./project_ideas/development/".$this->dataset_filename);
- $ranking_array['_DatasetModTime'] = $stat_array['mtime'];
- return $ranking_array;
- }
- function algorithm_bayes($individual_array,$threshold,$mof,$recenter,$gcv, $dataset, $memcache) {
- // Get list of prior probabilities
- $prior_probabilities_list = $memcache->get("$dataset:prior_prob");
-
- // echo "algorithm_bayes:prior_probabilities_list:<br/>";
- // echo "<pre/>".print_r($prior_probabilities_list)."</pre>";
- $MATURITY = 3; // offset in to the answer_list for the maturity
- $unordered_ranking = array();
- $project_prob = 1;
- $num_matching_questions = 0; // keeps track of the total number of matching questions between ind and project.
- $highest_maturity = 0; // records the maturity of the most mature question
- $skipped_questions = 0;
- $matching_questions_ref = array();
- $highest_maturities_ref = array();
- $question_maturity_threshold = 100;
- foreach($individual_array['Questions'] as $question => $user_answer) {
- // Grab the corresponding project data list from memcached.
- $project_list = $memcache->get("$dataset:$question:$user_answer");
- $project_maturity_list = $memcache->get("$dataset:$question:3");
- foreach($project_list as $project => $answer_prob) {
- $proj_ptr = &$unordered_ranking[$project]; // get a pointer to the project in the ranking. Speeds things up quite a bit.
- $maturity_ptr = &$project_maturity_list[$project];
- if($answer_prob == 0) {continue;}
- if($maturity_ptr > $question_maturity_threshold) {
- if(!$proj_ptr) {
- $proj_ptr = 1.0;
- }
- $proj_ptr = $proj_ptr * $answer_prob;
- $mqr_ptr = &$matching_questions_ref[$project];
- $mqr_ptr++;
- $project_hmr_ptr = &$highest_maturity_ref[$project];
- if($maturity_ptr > $project_hmr_ptr) {
- $project_hmr_ptr = $maturity_ptr;
- }
- }
- }
- }
- // Now need to loop through the projects and do the nth root/ prior probability calculations.
- $include_prior_probability = 1;
- foreach($matching_questions_ref as $project => $num_matching_questions) {
- $unordered_ranking[$project] = $unordered_ranking[$project] * $prior_probabilities_list[$project]; // Multiply by the prior probability.
- $unordered_ranking[$project] = pow($unordered_ranking[$project],(1 / ($num_matching_questions +1))); // Take the nth root where n = number of matching questions for this project.
- if(($unordered_ranking[$project] < 1) && ($highest_maturity_ref[$project] > $threshold)) {
- $filtered_unordered_ranking[$project] = $unordered_ranking[$project];
- }
- }
- // Now sort the unordered ranking
- arsort($filtered_unordered_ranking);
- $ranking_array = $filtered_unordered_ranking;
- foreach($ranking_array as $SK => $prob) {
- $return_ranking[$SK] = $prob;
- }
- return($return_ranking);
- }
- /**
- * function algorithm070220h
- *
- **/
- function algorithm070220h($big_array,$individual_array,$threshold,$mof,$recenter,$gcv) {
- // big_array holds an entire subject area. Individual_array is one individual's responses.
- $squared_diff = array();
- $absolute_diff = array();
- $MSD = array();
- $MAV = array();
- $SAV = array();
- // "<hr/>gcv = $gcv, recenter = $recenter<hr/>";
- if(!$recenter) {
- if($this->configuration['GlobalRecenterValue'] > 0) {
- $recenter = true;
- $gcv = $this->configuration['GlobalRecenterValue'];
- }
- }
- if($recenter) {
- /** If recenter == True, alter the individual_array by recentering the values:
- *
- * 1) Calculate the overall average of all the questions that the user answered = UserAvg
- * 2) Calculate a re-centering value for the user:
- * UserOffset = UserAvg ? GlobalCenterValue [from the template]
- * 3) Before calculating either the squared or absolute difference, adjust the user values in the $individual_array, for each question:
- * NewUserValuei = OldUserValuei ? UserOffset
- **/
- if(!isset($gcv)) {
- $gcv = 1; // Default Global Center Value to 1
- }
- $usercount = 0;
- $usertotal = 0;
- foreach($individual_array['Questions'] as $question => $answer) {
- if(!preg_match("/^_/", $question)) {
- continue;// Skip questions other than "01c, 1e," etc.
- }
- $usertotal += $answer;
- $usercount++;
- }
- $useravg = $usertotal / $usercount;
- //echo "<hr/>useravg: $useravg<hr/>";
- $useroffset = $useravg - $gcv;
- //echo "<hr/>useroffset: $useroffset<hr/>";
- array_walk($individual_array['Questions'],'recenter_callback',$useroffset);
- }
- // For each SK, for each survey question, calculate the squared difference
- // between the score of the user on the question, and the average for all users
- // on that question.
- foreach($big_array as $subarea => $subarea_array) {
- foreach($subarea_array['SKs'] as $SK => $SK_array) {
- foreach($SK_array['Questions'] as $question => $answer) {
- if(!preg_match("/^_/", $question)) {
- //echo "<HR/> skipping question $question because it's not a survey question";
- continue;
- } // Skip questions other than "01c, 1e," etc.
- if(!isset($individual_array['Questions'][$question])) {
- //echo "<HR/> skipping question $question because it's not in the individual_array";
- continue;
- } // Skip questions the user didn't answer
- $squared_diff[$SK][$question] = pow(($individual_array['Questions'][$question] - $answer),2);
- $absolute_diff[$SK][$question] = abs($individual_array['Questions'][$question] - $answer);
- }
- }
- }
- // For each SK, calculate the MSD by summing the squared differences and dividing by the number of questions
- foreach($squared_diff as $SD_SK => $SD_array) {
- foreach($SD_array as $sd) {
- //echo "<HR/>MSD[$SD_SK] += $sd";
- $MSD[$SD_SK] += $sd;
- }
- // echo "<HR/>MSD[$SD_SK] = ".$MSD[$SD_SK]." / ".count($SD_array);
- // KEL 02/08/08 -- Added rounding to 5 decimal places to save space in the cache.
- $MSD[$SD_SK] = round($MSD[$SD_SK] / count($SD_array),5);
- }
- asort($MSD); // Sort list by ascending MSD -- Best match first.
- // MAV -- Mean Absolute Value of Difference (MAV)
- foreach($absolute_diff as $SD_SK => $SD_array) {
- foreach($SD_array as $sd) {
- $MAV[$SD_SK] += $sd;
- }
- $MAV[$SD_SK] = $MAV[$SD_SK] / count($SD_array);
- }
- asort($MAV); // Sort list by ascending MAV -- Best match first.
- // SAV -- Mean Absolute Value of Difference (SAV)
- foreach($absolute_diff as $SD_SK => $SD_array) {
- foreach($SD_array as $sd) {
- $SAV[$SD_SK] += $sd;
- }
- }
- asort($SAV); // Sort list by ascending SAV -- Best match first.
- switch($mof) {
- case 'MSD':
- return $MSD;
- break;
- case 'MAV':
- return $MAV;
- break;
- case 'SAV':
- return $SAV;
- break;
- }
- // Default: MSD
- return $MSD;
- }
- /**
- * function post_process -- Run through a set of modifiers *once* on creation of a new ranking list.
- *
- * Primarily, this is used for new project randomization, since we want the newly randomized list to be
- * cached just like the original would be.
- *
- * Kristian Ljungkvist 09/19/07
- *
- **/
- function post_process($ranking_array,$individual_profile,$state_array) {
-
- $modifier_object = ModifierFactory::createModifier('NewProjectRandomizer');
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- $modifier_object = ModifierFactory::createModifier('ProjectRandomizer');
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
-
- return $ranking_array;
- }
- /**
- * function render_results
- *
- **/
- function render_results($ranking_array,$individual_profile,$state_array,$favorites) {
- // Generate a display version of the ranking array.
- // get the list of modifiers from the recommender_configuration for this originID.
- $modifier_list = $this->configuration['ModifierList'];
- $projects_per_page = $this->configuration['MaxProjectsPerPage'];
- $page = $state_array['p'];
- // Cycle through the modifiers as a pipeline.
- foreach (split(',',$modifier_list) as $modifier) {
- //echo "instantiating $modifier...<br/>";
- // instantiate the appropriate modifier
- if($modifier != 'Paginator') {
- // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
- $modifier_object = ModifierFactory::createModifier($modifier);
- // Pass it the current ranking list and state
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- // take the resulting ranking and pass it through to the next one.
- }
- }
- // Get the total number of pages in the rendered ranking
- $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
- // Now, go through the paginator:
- $modifier_object = ModifierFactory::createModifier('Paginator');
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- $display_array = array();
- $cur_rank = 1;
- foreach($ranking_array as $SK => $MSD) {
- // Check for empty SK, which would only happen if there was an empty project in the array.
- // KEL 01/30/08
- if(!$SK) {
- // Notify me by email if we get an empty SK -- KEL 01/30/08
- $msg = "cur_rank = $cur_rank\n";
- $msg .= "ranking_array:\n";
- $msg .= print_r($ranking_array,true);
- $msg .= "\nIndividualID={$state_array['rid']}\n";
- $msg .= "\npage={$state_array['p']}\n";
- $msg .= "\nStateID={$state_array['sid']}\n";
- mail("kristian.ljungkvist@gmail.com","Recommender.class.php -- Empty SK",$msg);
- continue;
- }
- // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
- if(strpos($SK, '_') === 0) {
- continue;
- }
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- /*
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
- if(!$cur_title) {
- // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
- }
- */
- // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
- // We'll probably decide to cache these in the sk_summary for performance reasons later.
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- $display_array[$cur_rank-1]['rank'] = $cur_rank;
- $display_array[$cur_rank-1]['subarea'] = $subarea;
- $display_array[$cur_rank-1]['filename'] = $SK;
- $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
- // Store total number of pages in result set in top entry of display_array.
- $display_array[$cur_rank-1]['total_pages'] = $total_pages;
- // We'll standardize the capitalization on these. -- KEL
- $display_array[$cur_rank-1]['Filename'] = $SK;
- $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
- $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
- $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
- $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
- $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
- $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
- $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
- // Add a flag if this project has been added to favorites
- if(in_array($SK,$favorites)) {
- $display_array[$cur_rank-1]['Favorite'] = true;
- }
- $cur_rank++;
- }
- return $display_array;
- }
- /**
- * render_favorites
- *
- **/
- function render_favorites($favorites) {
- $display_array = array();
- $cur_rank = 1;
- foreach($favorites as $SK) {
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
- if(!$cur_title) {
- // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
- }
- /*
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- //echo "[$subarea],";
- //echo "[$subarea][$SK],";
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
- //echo "[$cur_title]<br/>";
- if(!$cur_title) {
- // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
- }
- */
- // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
- // We'll probably decide to cache these in the sk_summary for performance reasons later.
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- $display_array[$cur_rank-1]['rank'] = $cur_rank;
- $display_array[$cur_rank-1]['subarea'] = $subarea;
- $display_array[$cur_rank-1]['filename'] = $SK;
- $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
- // The following are temporarily fetched from the DB. We should probably cache these in the sk_summary data instead. -- KEL
- $display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
- $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
- $display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
- $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
- $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
- $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
- $display_array[$cur_rank-1]['Favorite'] = true; // It is a favorite, after all...
- $cur_rank++;
- }
- return $display_array;
- }
- /**
- * function render_interest_area
- *
- **/
- function render_interest_area($ranking_array,$individual_profile,$state_array,$favorites) {
- // Generate a display version of the ranking array, filtered by interest area.
- // get the list of modifiers from the recommender_configuration for this originID.
- $modifier_list = $this->configuration['ModifierList'];
- $projects_per_page = $this->configuration['MaxProjectsPerPage'];
- $page = $state_array['p'];
- // Add the InterestArea modifer to the pipeline
- $modifier_list = 'DifficultyLevel,InterestArea,Deduper'; // Don't do any randomization on interest areas.
- // $modifier_list .= ',InterestArea';
- //AreaAssignment,DifficultyLevel,TimeRequired,Deduper,FirstPageNotRandom,HoneyPot,Paginator
-
- // Cycle through the modifiers as a pipeline.
- foreach (split(',',$modifier_list) as $modifier) {
- // instantiate the appropriate modifier
- if(($modifier != 'Paginator') && ($modifier != 'AreaAssignment')) {
- // Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
- // Also skip the AreaAssignment modifier, since we want to override that with the InterestArea Modifier.
- $modifier_object = ModifierFactory::createModifier($modifier);
-
- // Pass it the current ranking list and state
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- // take the resulting ranking and pass it through to the next one.
- }
- }
- // Get the total number of pages in the rendered ranking
- $total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
- // Now, go through the paginator:
- $modifier_object = ModifierFactory::createModifier('Paginator');
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- $display_array = array();
- $cur_rank = 1;
- foreach($ranking_array as $SK => $MSD) {
- // Check for empty SK, which would only happen if there was an empty project in the array.
- // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
- if(strpos($SK, '_') === 0) {
- continue;
- }
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
- if(!$cur_title) {
- // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
- }
- // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
- // We'll probably decide to cache these in the sk_summary for performance reasons later.
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- $display_array[$cur_rank-1]['rank'] = $cur_rank;
- $display_array[$cur_rank-1]['subarea'] = $subarea;
- $display_array[$cur_rank-1]['filename'] = $SK;
- $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
- // Store total number of pages in result set in top entry of display_array.
- $display_array[$cur_rank-1]['total_pages'] = $total_pages;
- // We'll standardize the capitalization on these. -- KEL
- $display_array[$cur_rank-1]['Filename'] = $SK;
- $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
- $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
- $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
- $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
- $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
- $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
- $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
- // Add a flag if this project has been added to favorites
- if(in_array($SK,$favorites)) {
- $display_array[$cur_rank-1]['Favorite'] = true;
- }
- $cur_rank++;
- }
- return $display_array;
- }
-
-
-
- /**
- * function render_interest_area_all_projects
- *
- **/
- function render_interest_area_all_projects($ranking_array,$individual_profile,$state_array,$favorites) {
- // Generate a display version of the ranking array, filtered by interest area.
-
- // For this version, we don't do any other filtering.
-
- $modifier_list = 'InterestArea';
- // Cycle through the modifiers as a pipeline.
- foreach (split(',',$modifier_list) as $modifier) {
- $modifier_object = ModifierFactory::createModifier($modifier);
- // Pass it the current ranking list and state
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- // take the resulting ranking and pass it through to the next one.
- }
- $display_array = array();
- $cur_rank = 1;
- foreach($ranking_array as $SK => $MSD) {
- // Check for empty SK, which would only happen if there was an empty project in the array.
- // escape the header fields that are prefixed with an underline, -- KEL 10/18/07
- if(strpos($SK, '_') === 0) {
- continue;
- }
- // Look up title for this SK.
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
- if(!$cur_title) {
- // Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
- $cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
- }
- // KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
- // We'll probably decide to cache these in the sk_summary for performance reasons later.
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- $display_array[$cur_rank-1]['rank'] = $cur_rank;
- $display_array[$cur_rank-1]['subarea'] = $subarea;
- $display_array[$cur_rank-1]['filename'] = $SK;
- $display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
- // We'll standardize the capitalization on these. -- KEL
- $display_array[$cur_rank-1]['Filename'] = $SK;
- $display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
- $display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
- $display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
- $display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
- // $display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
- // $display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
- // $display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
- // Add a flag if this project has been added to favorites
- if(in_array($SK,$favorites)) {
- $display_array[$cur_rank-1]['Favorite'] = true;
- }
- $cur_rank++;
- }
- return $display_array;
- }
- /**
- * function get_first_recommendation -- returns the top project that would be displayed for this user, which in many cases
- * will differ from the top ranked project due to interestarea restrictions, difficulty level, and time required.
- *
- * Basically, this method runs through all the standard filters and modifiers except the pagination modifier, and
- * Returns the first entry in the resulting list.
- *
- * Primarily, this is used for logging the FirstRecommendation in the recommender_action table.
- *
- * Kristian Ljungkvist 10/09/07
- *
- **/
- function get_first_recommendation($ranking_array,$individual_profile,$state_array,$favorites) {
- // get the list of modifiers from the recommender_configuration for this originID.
- $modifier_list = $this->configuration['ModifierList'];
- $projects_per_page = $this->configuration['MaxProjectsPerPage'];
- $page = $state_array['p'];
- // Cycle through the modifiers as a pipeline.
- foreach (split(',',$modifier_list) as $modifier) {
- //echo "instantiating $modifier...<br/>";
- // instantiate the appropriate modifier
- if($Modifier != 'Paginator') {
- $modifier_object = ModifierFactory::createModifier($modifier);
- // Pass it the current ranking list and state
- $ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
- // take the resulting ranking and pass it through to the next one.
- }
- }
- return key($ranking_array);
- }
- /**
- * get_title_of_sk
- *
- **/
- function get_title_of_sk($SK) {
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- return $project_idea_data['Title'];
- }
- /**
- * get_solt_details_of_sk
- *
- * Eleboration on get_title_of_sk to get subareacode, etc.
- **/
- function get_solt_details_of_sk($SK) {
- $sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
- $project_idea_data = $this->DB_Connection->query($sql);
- return $project_idea_data;
- }
- /**
- * get_msd_of_sk
- *
- **/
- function get_MSD_of_sk($SK,$individual_state,$state_array) {
- $stateID = $state_array['sid'];
- // First, find the most recent individual-based ranking for this individual.
- $mrir = $this->get_most_recent_individual_ranking($individual_state);
- // echo "<hr/>mrir:<br/>";
- // print_r($mrir);
- // echo "<hr/>";
- // Find the record in the corresponding ranking list for SK.
- $MSD = $mrir[$SK];
- return($MSD);
- }
- /**
- * get_most_recent_individual_ranking($individual_state)
- *
- * @return array: most recent individual ranking
- * @author Kristian Ljungkvist
- **/
- function get_most_recent_individual_ranking($individual_state) {
- // Loop through the states and identify the individual-sourced
- // ranking with the highest stateID.
- $i= 1;
- $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
- foreach($individual_state as $cur_state) {
- //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
- //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
- if(!$cur_state['ranking']) {continue;}
- if($cur_state['ranking']['_Source'] =='Individual') {
- $most_recent_ind_ranking = $i;
- //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
- }
- $i++;
- }
- return $individual_state[$most_recent_ind_ranking]['ranking'];
- }
- /**
- * get_most_recent_individual_ranking_id($individual_state)
- *
- * @return int: most recent individual ranking ID (stateID in cache)
- * @author Kristian Ljungkvist
- **/
- function get_most_recent_individual_ranking_id($individual_state) {
- // Loop through the states and identify the individual-sourced
- // ranking with the highest stateID.
- $i= 1;
- $most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
- foreach($individual_state as $cur_state) {
- //echo "$i: cur_state:".print_r($cur_state)."<hr/>";
- //echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
- if(!$cur_state['ranking']) {continue;}
- if($cur_state['ranking']['_Source'] =='Individual') {
- $most_recent_ind_ranking = $i;
- //echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
- }
- $i++;
- }
- return $most_recent_ind_ranking;
- }
- /**
- * get_cached_solt_ranking_id($solt_sk, $individual_state)
- *
- * @return int: stateID of cached ranking for this project (if it's in the cache)
- * @author Kristian Ljungkvist
- **/
- function get_cached_solt_ranking_id($solt_sk,$individual_state) {
- $cached_solt_ranking_id = -1; // -1 means no match, in this context.
- $i=1;
- foreach($individual_state as $cur_state) {
- if(!$cur_state['ranking']) {continue;}
- if(($cur_state['ranking']['_Source'] =='Project') && ($cur_state['ranking']['_SourceProfile'] == $solt_sk)) {
- $cached_solt_ranking_id = $i;
- }
- $i++;
- }
- return $cached_solt_ranking_id;
- }
- /**
- * get_page_indicator
- *
- **/
- function get_page_indicator($display_array,$state_array) {
- $total_pages = $display_array[1]['total_pages'];
- $current_page = $state_array['p'];
- // Figure out which page numbers to display. Return an array
- // First, figure out if we should display the '<' and '>' buttons.
- if($current_page == 1) {
- $p_i['Prev'] = 0;
- } else {
- $p_i['Prev'] = 1;
- }
- if($current_page +1 > $total_pages) {
- $p_i['Next'] = 0;
- } else {
- $p_i['Next'] = 1;
- }
- // Now, figure out the range of numbers to display.
- // This should be 11 numbers.
- // If current_page < 7, 1-11. >=7, current_page - 5 through current_page + 5 (unless total_pages is less.)
- if($current_page < 7) {
- $pages = range(1,max((min(11,$total_pages)),1)); // The max function is there to handle the case where total_pages is zero (KEL 07/03/08)
- } else {
- if(($current_page + 5) < $total_pages) {
- $pages = range(($current_page - 5),($current_page + 5));
- } else {
- $pages = range(($current_page -5), $total_pages);
- }
- }
- $p_i['Pages'] = $pages;
- return $p_i;
- }
- /**
- * recommendation_summary
- *
- * @param ranking_array current ranking
- * @param max_areas_to_include: hard limit (if non-zero) of how many areas to include
- * @param msd_limit: limit, max MSD of areas to include. If any area has a median higher than this,
- * it is not included in the results.
- * @return array containing a sorted, possibly restricted list of median MSDs per interest area.
- * @author Kristian Ljungkvist
- **/
- function recommendation_summary($ranking_array,$min_areas_to_include,$max_areas_to_include,$msd_limit){
- foreach($ranking_array as $SK => $MSD) {
- if(strpos($SK, '_') === 0) {
- continue;
- }
- // Build array of subarea,MSD for all subareas in ranking:
- // Chem => array(0.23,0.24,0.34)
- // Bio => array(0.34,0.55,0.67)
- // Extract the sub area
- preg_match('/^([^_]+)/', $SK, $match);
- $subarea = $match[0];
- if($subarea == 'HoneyPot') {
- continue;
- }
- $msd_by_area[$subarea][]=$MSD;
- }
- // Compute the median msd for each area
- foreach($msd_by_area as $area => $msd_list) {
- if($area) {
- $median_msd_by_area[$area] = $msd_list[floor(count($msd_list)/2)];
- }
- }
- // Now sort the areas in increasing median msd order.
- asort($median_msd_by_area);
- // First, if msd_limit is set, exclude any areas with median msd higher than that number
- if($msd_limit) {
- foreach($median_msd_by_area as $area => $median_msd) {
- if($median_msd <= $msd_limit) {
- $restricted_list[$area] = $median_msd;
- }
- }
- if(count($restricted_list) > $min_areas_to_include) {
- $median_msd_by_area = $restricted_list;
- }
- }
- // Secondly, if max_areas_to_include is set, place a hard-limit on the number of items in the list.
- if($max_areas_to_include) {
- $median_msd_by_area = array_slice($median_msd_by_area, 0, $max_areas_to_include);
- }
- // Look up interest area titles, etc. from project_ideas table.
- $cur_rank = 0;
- foreach($median_msd_by_area as $area => $median_msd) {
- $sql = "SELECT * FROM project_ideas WHERE SubAreaCode='$area' LIMIT 1";
- $project_idea_area_data = $this->DB_Connection->query($sql);
- $display_array[$cur_rank]['subareacode'] = $project_idea_area_data['SubAreaCode'];
- $display_array[$cur_rank]['subarea'] = $project_idea_area_data['SubArea'];
- $display_array[$cur_rank]['area'] = $project_idea_area_data['Area'];
- $display_array[$cur_rank]['subarea_median_msd'] = $median_msd;
- $cur_rank++;
- }
- // return the displayable list, limited perhaps by length and max MSD.
- return($display_array);
- }
- }
- ?>