PageRenderTime 57ms CodeModel.GetById 15ms app.highlight 32ms RepoModel.GetById 2ms app.codeStats 0ms

/Recommender.class.php

https://bitbucket.org/kljungkvist/test-fork
PHP | 985 lines | 492 code | 200 blank | 293 comment | 60 complexity | d61cb305b0fd017966217475bfd194ab MD5 | raw file
  1<?php
  2/*********************************************************** 
  3 * Recommender Class -- Bayes/memcache version
  4 *
  5 *	06/16/2009 -- KEL created Bayes/Memcache fork for 2009 version
  6 * Created 08/07/2007 by Kristian Ljungkvist Based on Recommender test bed work.
  7 *
  8 * Copyright 2007 Science Buddies. All Rights Reserved
  9 ************************************************************/ 
 10require_once 'sciencebuddies/Error.class.php';
 11require_once 'sciencebuddies/Database.class.php';
 12require_once 'sciencebuddies/Modifier.class.php';
 13
 14class Recommender
 15{
 16
 17	var $sk_summary = array();
 18	var $configuration = array();
 19	var $memcache_handle = null;
 20	var $DB_Connection = null;
 21	var $dataset_filename = "";
 22	var $MEMCACHE_SOCKET_FILE = 'unix:///home/science/memcached.sock';
 23
 24
 25	/**
 26		* Constructor
 27		*
 28		*
 29		* @author Kristian Ljungkvist
 30		*/
 31	function	__construct($db,$originID,$dataset,$memcache)	{
 32		// Bayes/memcache version uses dataset as a prefix into memcache for the various datasets.
 33		if(!$db) {
 34			// No database handle is considered a critical error.
 35			trigger_error("No Database handle passed to Recommender constructor",E_USER_ERROR);
 36			exit;
 37		}
 38		if(!$memcache) {
 39				// Here, let's not force the client to pass in a memcache handle. Instead, we'll create one here if it's not passed in.
 40				$memcache = new Memcache;
 41				$memcache->connect($this->MEMCACHE_SOCKET_FILE,0) or trigger_error ("Could not connect to memcache", E_USER_ERROR);
 42				
 43				// Still no memcache handle? Now it's considered a critical error.
 44				if(!$memcache) {
 45					trigger_error("No Memcache handle passed to Recommender constructor",E_USER_ERROR);
 46					exit;
 47				}
 48			
 49		}
 50		// Now, get configuration for recommender with the current originID -- default: 1.		
 51		$this->DB_Connection = $db;
 52		$sql = "SELECT * FROM recommender_config where originID=$originID";
 53
 54		$this->configuration = $this->DB_Connection->query($sql);
 55
 56		$this->memcache_handle = $memcache;
 57		$this->dataset_filename = $dataset;
 58		$this->sk_summary = $memcache->get("$dataset:sk_summary");
 59	}
 60
 61	/**
 62		* individual_ranking
 63		*
 64		* Returns the ranking of SKs for the current dataset and the individual passed in.
 65		* @author Kristian Ljungkvist
 66		*/
 67
 68	function individual_ranking($ind_summary) {
 69
 70
 71		$ranking_array = $this->algorithm_bayes($ind_summary,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
 72	//	$ranking_array = $this->algorithm070220h($this->sk_summary,$ind_summary,4);
 73
 74		// KEL 10/18/07 -- Also tag the resulting ranking with the ProfileID of the profile that created it.
 75		// We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
 76
 77		$ranking_array['_Source'] = 'Individual';
 78		$ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
 79		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
 80		// This is so that we can check for updated datasets and invalidate the cache in those cases.
 81		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
 82		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
 83		
 84		$ranking_array['_DatasetFilename'] = $this->dataset_filename; 	// KEL 06/24/09 -- Store the originating dataset filename for future reference
 85																		// since we'll have several "neighborhood" datasets.
 86
 87		return $ranking_array;
 88	}
 89
 90
 91	/**
 92		* show_others_like_this
 93		*
 94		* Returns the ranking of SKs against the passed-in SK.
 95		* @author Kristian Ljungkvist
 96		*/
 97
 98	function show_others_like_this($solt_sk) {
 99		// get profile for the source project 
100		// Look up title for this SK.
101		preg_match('/^([^_]+)/', $solt_sk, $match);
102		$subarea = $match[0];
103
104		$source_project_profile = $this->sk_summary[$subarea]['SKs'][$solt_sk];
105
106		// Generate the ranking of that project against the others
107		
108		// NOTE: We need to generate the project profile somehow in the new Bayes context. We'll most likely need to store one record per project in memcache with
109		// the project profile. This will need to happen in the add_to_memcache utility.
110		$ranking_array = $this->algorithm_bayes($source_project_profile,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
111
112
113		// KEL 10/18/07 -- Also tag the resulting ranking with the source and the ProfileID of the profile that created it.
114		// We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
115
116		$ranking_array['_Source'] = 'Project';
117		$ranking_array['_SourceProfile'] = $solt_sk; // For show others like this, we'll use the project filename as the source ID.
118		
119		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
120		// This is so that we can check for updated datasets and invalidate the cache in those cases.
121		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
122		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
123
124		return $ranking_array;
125	}
126
127
128	/**
129		* static_ranking
130		*
131		* Essentially converts the current dataset into a "ranking" of the form the rendering methods expect.
132		* Used for the dynamic interest area pages.
133		* @author Kristian Ljungkvist 05/21/08
134		*/
135
136	function static_ranking() {
137		
138		// Convert the sk_summary member to the SK->MSD form. We'll set all MSD's to 1.0 for this "ranking"
139		
140		foreach($this->sk_summary as $subarea => $subarea_array) {
141			foreach($subarea_array['SKs'] as $SK => $SK_array) {
142				$ranking_array[$SK] = 1.0;
143			}
144		}
145
146		$ranking_array['_Source'] = 'Individual';
147		$ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
148
149		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
150		// This is so that we can check for updated datasets and invalidate the cache in those cases.
151		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
152		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
153
154		return $ranking_array;
155	}
156
157
158
159	function algorithm_bayes($individual_array,$threshold,$mof,$recenter,$gcv, $dataset, $memcache) {
160		// Get list of prior probabilities
161		$prior_probabilities_list = $memcache->get("$dataset:prior_prob");
162		
163//		echo "algorithm_bayes:prior_probabilities_list:<br/>";
164//		echo "<pre/>".print_r($prior_probabilities_list)."</pre>";
165
166		$MATURITY = 3; // offset in to the answer_list for the maturity
167
168
169		$unordered_ranking = array();
170		$project_prob = 1;
171
172		$num_matching_questions = 0; // keeps track of the total number of matching questions between ind and project.
173		$highest_maturity = 0; // records the maturity of the most mature question
174		$skipped_questions = 0;
175		$matching_questions_ref = array();
176		$highest_maturities_ref = array();
177		$question_maturity_threshold = 100;
178
179		foreach($individual_array['Questions'] as $question => $user_answer) {
180			// 		Grab the corresponding project data list from memcached.
181			$project_list = $memcache->get("$dataset:$question:$user_answer");
182			$project_maturity_list = $memcache->get("$dataset:$question:3");
183			foreach($project_list as $project => $answer_prob) {
184				$proj_ptr = &$unordered_ranking[$project]; // get a pointer to the project in the ranking. Speeds things up quite a bit.
185				$maturity_ptr = &$project_maturity_list[$project];
186				if($answer_prob == 0) {continue;}
187				if($maturity_ptr > $question_maturity_threshold) {
188					if(!$proj_ptr) {
189						$proj_ptr = 1.0;
190					}
191					$proj_ptr = $proj_ptr * $answer_prob;
192
193					$mqr_ptr = &$matching_questions_ref[$project];
194					$mqr_ptr++;  
195					$project_hmr_ptr = &$highest_maturity_ref[$project];
196					if($maturity_ptr > $project_hmr_ptr) {
197						$project_hmr_ptr = $maturity_ptr;
198					}
199				}
200			}
201		}
202
203		// Now need to loop through the projects and do the nth root/ prior probability calculations.
204		$include_prior_probability = 1;
205		foreach($matching_questions_ref as $project => $num_matching_questions) {					
206			$unordered_ranking[$project] = $unordered_ranking[$project] * $prior_probabilities_list[$project]; // Multiply by the prior probability.
207			$unordered_ranking[$project] = pow($unordered_ranking[$project],(1 / ($num_matching_questions +1))); // Take the nth root where n = number of matching questions for this project.
208			if(($unordered_ranking[$project] < 1) && ($highest_maturity_ref[$project] > $threshold)) {
209				$filtered_unordered_ranking[$project] = $unordered_ranking[$project];
210			}
211		}
212		// Now sort the unordered ranking
213		arsort($filtered_unordered_ranking);
214
215		$ranking_array = $filtered_unordered_ranking;
216		foreach($ranking_array as $SK => $prob) {
217			$return_ranking[$SK] = $prob;
218		}
219		return($return_ranking);
220	}
221
222
223
224	/**
225		* function algorithm070220h
226		*
227		**/
228	function algorithm070220h($big_array,$individual_array,$threshold,$mof,$recenter,$gcv) {
229
230		// big_array holds an entire subject area. Individual_array is one individual's responses.
231		$squared_diff = array();
232		$absolute_diff = array();
233		$MSD = array();
234		$MAV = array();
235		$SAV = array();
236
237		// "<hr/>gcv = $gcv, recenter = $recenter<hr/>";
238		if(!$recenter) {
239			if($this->configuration['GlobalRecenterValue'] > 0) {
240				$recenter = true;
241				$gcv = $this->configuration['GlobalRecenterValue'];
242			}
243		}
244		if($recenter) {
245			/** If recenter == True, alter the individual_array by recentering the values:
246			*
247				* 1) Calculate the overall average of all the questions that the user answered = UserAvg 
248				* 2)  Calculate a re-centering value for the user: 
249			*  UserOffset = UserAvg ? GlobalCenterValue  [from the template] 
250				* 3)  Before calculating either the squared or absolute difference, adjust the user values in the $individual_array, for each question: 
251			* NewUserValuei = OldUserValuei ? UserOffset 
252				**/
253
254			if(!isset($gcv)) {
255				$gcv = 1; // Default Global Center Value to 1
256			}
257			$usercount = 0;
258			$usertotal = 0;
259			foreach($individual_array['Questions'] as $question => $answer) {
260				if(!preg_match("/^_/", $question)) { 
261					continue;// Skip questions other than "01c, 1e," etc.
262				} 
263				$usertotal += $answer;
264				$usercount++;
265			}
266			$useravg = $usertotal / $usercount;
267			//echo "<hr/>useravg: $useravg<hr/>";
268			$useroffset = $useravg - $gcv;
269			//echo "<hr/>useroffset: $useroffset<hr/>";
270
271			array_walk($individual_array['Questions'],'recenter_callback',$useroffset);
272		}
273
274		// For each SK, for each survey question, calculate the squared difference
275		// between the score of the user on the question, and the average for all users
276		// on that question.
277
278		foreach($big_array as $subarea => $subarea_array) {
279			foreach($subarea_array['SKs'] as $SK => $SK_array) {
280				foreach($SK_array['Questions'] as $question => $answer) {
281					if(!preg_match("/^_/", $question)) { 
282						//echo "<HR/> skipping question $question because it's not a survey question";
283						continue;
284					} // Skip questions other than "01c, 1e," etc.
285					if(!isset($individual_array['Questions'][$question])) {
286						//echo "<HR/> skipping question $question because it's not in the individual_array";
287						continue;
288					} // Skip questions the user didn't answer
289					$squared_diff[$SK][$question] = pow(($individual_array['Questions'][$question] - $answer),2);
290					$absolute_diff[$SK][$question] = abs($individual_array['Questions'][$question] - $answer);
291				}
292			}
293		}
294
295
296
297		// For each SK, calculate the MSD by summing the squared differences and dividing by the number of questions
298
299		foreach($squared_diff as $SD_SK => $SD_array) {
300			foreach($SD_array as $sd) {
301				//echo "<HR/>MSD[$SD_SK] += $sd";
302				$MSD[$SD_SK] += $sd;
303			}
304			// echo "<HR/>MSD[$SD_SK] = ".$MSD[$SD_SK]." / ".count($SD_array);
305			// KEL 02/08/08 -- Added rounding to 5 decimal places to save space in the cache.
306			$MSD[$SD_SK] = round($MSD[$SD_SK] / count($SD_array),5);
307		}
308		asort($MSD); // Sort list by ascending MSD -- Best match first.
309
310		// MAV -- Mean Absolute Value of Difference (MAV)
311
312		foreach($absolute_diff as $SD_SK => $SD_array) {
313			foreach($SD_array as $sd) {
314				$MAV[$SD_SK] += $sd;
315			}
316			$MAV[$SD_SK] = $MAV[$SD_SK] / count($SD_array);
317		}
318		asort($MAV); // Sort list by ascending MAV -- Best match first.
319
320		// SAV -- Mean Absolute Value of Difference (SAV)
321
322		foreach($absolute_diff as $SD_SK => $SD_array) {
323			foreach($SD_array as $sd) {
324				$SAV[$SD_SK] += $sd;
325			}
326		}
327		asort($SAV); // Sort list by ascending SAV -- Best match first.
328
329		switch($mof) {
330			case 'MSD':
331			return $MSD;
332			break;
333			case 'MAV':
334			return $MAV;
335			break;
336			case 'SAV':
337			return $SAV;
338			break;
339		}
340		// Default: MSD
341		return $MSD;
342	}
343
344
345	/**
346		* function post_process -- Run through a set of modifiers *once* on creation of a new ranking list.
347		*
348		* Primarily, this is used for new project randomization, since we want the newly randomized list to be
349		* cached just like the original would be.
350		*
351		* Kristian Ljungkvist 09/19/07
352		*
353		**/
354	function post_process($ranking_array,$individual_profile,$state_array) {
355		
356		$modifier_object = ModifierFactory::createModifier('NewProjectRandomizer');
357		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
358		$modifier_object = ModifierFactory::createModifier('ProjectRandomizer');
359		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
360		
361		return $ranking_array;
362	}
363
364
365
366
367
368	/**
369		* function render_results
370		*
371		**/
372	function render_results($ranking_array,$individual_profile,$state_array,$favorites) {
373
374		// Generate a display version of the ranking array.
375
376		// get the list of modifiers from the recommender_configuration for this originID.
377		$modifier_list = $this->configuration['ModifierList'];
378		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
379		$page = $state_array['p'];
380
381		// Cycle through the modifiers as a pipeline.
382
383		foreach (split(',',$modifier_list) as $modifier) {
384			//echo "instantiating $modifier...<br/>";
385			// instantiate the appropriate modifier
386			if($modifier != 'Paginator') {
387				// Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
388				$modifier_object = ModifierFactory::createModifier($modifier);
389
390				// Pass it the current ranking list and state
391				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
392				// take the resulting ranking and pass it through to the next one.   
393			}
394		}
395
396		// Get the total number of pages in the rendered ranking
397
398
399
400
401		$total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
402
403
404
405		// Now, go through the paginator:
406		$modifier_object = ModifierFactory::createModifier('Paginator');
407
408		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
409
410
411		$display_array = array();
412		$cur_rank = 1;
413		foreach($ranking_array as $SK => $MSD) {
414			// Check for empty SK, which would only happen if there was an empty project in the array.
415			// KEL 01/30/08
416			if(!$SK) {
417				// Notify me by email if we get an empty SK -- KEL 01/30/08
418				$msg = "cur_rank = $cur_rank\n";
419				$msg .= "ranking_array:\n";
420				$msg .= print_r($ranking_array,true);
421				$msg .= "\nIndividualID={$state_array['rid']}\n";
422				$msg .= "\npage={$state_array['p']}\n";
423				$msg .= "\nStateID={$state_array['sid']}\n";
424				mail("kristian.ljungkvist@gmail.com","Recommender.class.php -- Empty SK",$msg);
425				continue;
426			}
427			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
428			if(strpos($SK, '_') === 0) {
429				continue;
430			}
431
432
433			// Look up title for this SK.
434			preg_match('/^([^_]+)/', $SK, $match);
435			$subarea = $match[0];
436/*
437
438			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
439			if(!$cur_title) {
440				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
441				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
442			}
443*/
444
445			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
446			// We'll probably decide to cache these in the sk_summary for performance reasons later.
447
448			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
449			$project_idea_data = $this->DB_Connection->query($sql);
450
451
452			$display_array[$cur_rank-1]['rank'] = $cur_rank;
453			$display_array[$cur_rank-1]['subarea'] = $subarea;
454			$display_array[$cur_rank-1]['filename'] = $SK;
455			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
456
457			// Store total number of pages in result set in top entry of display_array.   
458			$display_array[$cur_rank-1]['total_pages'] = $total_pages;
459
460
461			// We'll standardize the capitalization on these. -- KEL 
462			$display_array[$cur_rank-1]['Filename'] = $SK;
463			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
464
465			$display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
466			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
467			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
468			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
469			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
470			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
471
472			// Add a flag if this project has been added to favorites
473
474			if(in_array($SK,$favorites)) {
475				$display_array[$cur_rank-1]['Favorite'] = true;
476			}
477
478			$cur_rank++;
479		}
480		return $display_array;
481	}
482
483	/**
484		* render_favorites
485		*
486		**/
487
488	function render_favorites($favorites) {
489		$display_array = array();
490		$cur_rank = 1;
491		foreach($favorites as $SK) {
492
493			// Look up title for this SK.
494			preg_match('/^([^_]+)/', $SK, $match);
495			$subarea = $match[0];
496
497			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
498			if(!$cur_title) {
499				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
500				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
501			}
502			/*
503				// Look up title for this SK.
504			preg_match('/^([^_]+)/', $SK, $match);
505			$subarea = $match[0];
506			//echo "[$subarea],";
507			//echo "[$subarea][$SK],";
508
509			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
510			//echo "[$cur_title]<br/>";
511			if(!$cur_title) {
512				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
513				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
514			}
515			*/
516
517			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
518			// We'll probably decide to cache these in the sk_summary for performance reasons later.
519
520			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
521			$project_idea_data = $this->DB_Connection->query($sql);
522
523			$display_array[$cur_rank-1]['rank'] = $cur_rank;
524			$display_array[$cur_rank-1]['subarea'] = $subarea;
525			$display_array[$cur_rank-1]['filename'] = $SK;
526			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
527
528			// The following are temporarily fetched from the DB. We should probably cache these in the sk_summary data instead. -- KEL
529			$display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
530			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
531			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
532			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
533			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
534			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
535			$display_array[$cur_rank-1]['Favorite'] = true; // It is a favorite, after all...
536			$cur_rank++;
537		}
538		return $display_array;
539	}
540
541	/**
542		* function render_interest_area
543		*
544		**/
545	function render_interest_area($ranking_array,$individual_profile,$state_array,$favorites) {
546
547		// Generate a display version of the ranking array, filtered by interest area.
548
549		// get the list of modifiers from the recommender_configuration for this originID.
550		$modifier_list = $this->configuration['ModifierList'];
551		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
552		$page = $state_array['p'];
553
554		// Add the InterestArea modifer to the pipeline
555		$modifier_list = 'DifficultyLevel,InterestArea,Deduper';  // Don't do any randomization on interest areas.
556	//	$modifier_list .= ',InterestArea';
557		//AreaAssignment,DifficultyLevel,TimeRequired,Deduper,FirstPageNotRandom,HoneyPot,Paginator
558		
559		// Cycle through the modifiers as a pipeline.
560
561		foreach (split(',',$modifier_list) as $modifier) {
562			// instantiate the appropriate modifier
563			if(($modifier != 'Paginator') && ($modifier != 'AreaAssignment')) {
564				// Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
565				// Also skip the AreaAssignment modifier, since we want to override that with the InterestArea Modifier.
566				$modifier_object = ModifierFactory::createModifier($modifier);
567				
568
569				// Pass it the current ranking list and state
570				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
571				// take the resulting ranking and pass it through to the next one.   
572			}
573		}
574
575		// Get the total number of pages in the rendered ranking
576
577		$total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
578
579
580
581		// Now, go through the paginator:
582		$modifier_object = ModifierFactory::createModifier('Paginator');
583
584		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
585
586		$display_array = array();
587		$cur_rank = 1;
588		foreach($ranking_array as $SK => $MSD) {
589			// Check for empty SK, which would only happen if there was an empty project in the array.
590			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
591			if(strpos($SK, '_') === 0) {
592				continue;
593			}
594
595			// Look up title for this SK.
596			preg_match('/^([^_]+)/', $SK, $match);
597			$subarea = $match[0];
598
599			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
600			if(!$cur_title) {
601				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
602				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
603			}
604
605			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
606			// We'll probably decide to cache these in the sk_summary for performance reasons later.
607
608			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
609			$project_idea_data = $this->DB_Connection->query($sql);
610
611
612			$display_array[$cur_rank-1]['rank'] = $cur_rank;
613			$display_array[$cur_rank-1]['subarea'] = $subarea;
614			$display_array[$cur_rank-1]['filename'] = $SK;
615			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
616
617
618			// Store total number of pages in result set in top entry of display_array.   
619			$display_array[$cur_rank-1]['total_pages'] = $total_pages;
620
621			// We'll standardize the capitalization on these. -- KEL 
622			$display_array[$cur_rank-1]['Filename'] = $SK;
623			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
624
625			$display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
626			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
627			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
628			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
629			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
630			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
631
632			// Add a flag if this project has been added to favorites
633
634			if(in_array($SK,$favorites)) {
635				$display_array[$cur_rank-1]['Favorite'] = true;
636			}
637
638			$cur_rank++;
639		}
640		return $display_array;
641	}
642	
643	
644
645	
646	/**
647		* function render_interest_area_all_projects
648		*
649		**/
650	function render_interest_area_all_projects($ranking_array,$individual_profile,$state_array,$favorites) {
651
652		// Generate a display version of the ranking array, filtered by interest area.
653		
654		// For this version, we don't do any other filtering.
655
656		
657		$modifier_list = 'InterestArea';
658		// Cycle through the modifiers as a pipeline.
659
660		foreach (split(',',$modifier_list) as $modifier) {
661				$modifier_object = ModifierFactory::createModifier($modifier);
662				// Pass it the current ranking list and state
663				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
664				// take the resulting ranking and pass it through to the next one.   
665		}
666
667
668		$display_array = array();
669		$cur_rank = 1;
670		foreach($ranking_array as $SK => $MSD) {
671			// Check for empty SK, which would only happen if there was an empty project in the array.
672			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
673			if(strpos($SK, '_') === 0) {
674				continue;
675			}
676
677			// Look up title for this SK.
678			preg_match('/^([^_]+)/', $SK, $match);
679			$subarea = $match[0];
680
681			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
682			if(!$cur_title) {
683				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
684				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
685			}
686
687			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
688			// We'll probably decide to cache these in the sk_summary for performance reasons later.
689
690			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
691			$project_idea_data = $this->DB_Connection->query($sql);
692
693
694			$display_array[$cur_rank-1]['rank'] = $cur_rank;
695			$display_array[$cur_rank-1]['subarea'] = $subarea;
696			$display_array[$cur_rank-1]['filename'] = $SK;
697			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
698
699
700			// We'll standardize the capitalization on these. -- KEL 
701			$display_array[$cur_rank-1]['Filename'] = $SK;
702			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
703
704			$display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
705			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
706			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
707//			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
708//			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
709//			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
710
711			// Add a flag if this project has been added to favorites
712
713			if(in_array($SK,$favorites)) {
714				$display_array[$cur_rank-1]['Favorite'] = true;
715			}
716
717			$cur_rank++;
718		}
719		return $display_array;
720	}
721
722	/**
723		* function get_first_recommendation -- returns the top project that would be displayed for this user, which in many cases
724		* will differ from the top ranked project due to interestarea restrictions, difficulty level, and time required.
725		*
726		* Basically, this method runs through all the standard filters and modifiers except the pagination modifier, and 
727		* Returns the first entry in the resulting list.
728		*
729		* Primarily, this is used for logging the FirstRecommendation in the recommender_action table.
730		*
731		* Kristian Ljungkvist 10/09/07
732		*
733		**/  
734	function get_first_recommendation($ranking_array,$individual_profile,$state_array,$favorites) {
735		// get the list of modifiers from the recommender_configuration for this originID.
736		$modifier_list = $this->configuration['ModifierList'];
737		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
738		$page = $state_array['p'];
739
740		// Cycle through the modifiers as a pipeline.
741
742		foreach (split(',',$modifier_list) as $modifier) {
743			//echo "instantiating $modifier...<br/>";
744			// instantiate the appropriate modifier
745			if($Modifier != 'Paginator') {
746				$modifier_object = ModifierFactory::createModifier($modifier);
747				// Pass it the current ranking list and state
748				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
749				// take the resulting ranking and pass it through to the next one.   
750			}
751		}
752		return key($ranking_array);
753
754	}
755	/**
756		* get_title_of_sk
757		*
758		**/
759
760	function get_title_of_sk($SK) {
761		$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
762		$project_idea_data = $this->DB_Connection->query($sql);
763		return $project_idea_data['Title'];
764	}
765
766	/**
767		* get_solt_details_of_sk
768		*
769		* Eleboration on get_title_of_sk to get subareacode, etc.
770		**/
771
772	function get_solt_details_of_sk($SK) {
773		$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
774		$project_idea_data = $this->DB_Connection->query($sql);
775		return $project_idea_data;
776	}
777
778
779	/**
780		* get_msd_of_sk
781		*
782		**/
783
784	function get_MSD_of_sk($SK,$individual_state,$state_array) {
785		$stateID = $state_array['sid'];
786
787		// First, find the most recent individual-based ranking for this individual.
788
789		$mrir = $this->get_most_recent_individual_ranking($individual_state);
790		//	echo "<hr/>mrir:<br/>";
791		//	print_r($mrir);
792		//	echo "<hr/>";
793
794		// Find the record in the corresponding ranking list for SK.
795		$MSD = $mrir[$SK];
796		return($MSD);
797	}
798
799	/**
800		* get_most_recent_individual_ranking($individual_state)
801		*
802		* @return array: most recent individual ranking
803		* @author Kristian Ljungkvist
804		**/
805	function get_most_recent_individual_ranking($individual_state) {
806		// Loop through the states and identify the individual-sourced
807		// ranking with the highest stateID.
808		$i= 1;
809		$most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
810		foreach($individual_state as $cur_state) {
811			//echo "$i: cur_state:".print_r($cur_state)."<hr/>";
812			//echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
813			if(!$cur_state['ranking']) {continue;}
814			if($cur_state['ranking']['_Source'] =='Individual') {
815				$most_recent_ind_ranking = $i;
816				//echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
817			}
818			$i++;
819		}
820		return $individual_state[$most_recent_ind_ranking]['ranking'];
821	}
822
823	/**
824		* get_most_recent_individual_ranking_id($individual_state)
825		*
826		* @return int: most recent individual ranking ID (stateID in cache)
827		* @author Kristian Ljungkvist
828		**/
829	function get_most_recent_individual_ranking_id($individual_state) {
830		// Loop through the states and identify the individual-sourced
831		// ranking with the highest stateID.
832		$i= 1;
833		$most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
834		foreach($individual_state as $cur_state) {
835			//echo "$i: cur_state:".print_r($cur_state)."<hr/>";
836			//echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
837			if(!$cur_state['ranking']) {continue;}
838			if($cur_state['ranking']['_Source'] =='Individual') {
839				$most_recent_ind_ranking = $i;
840				//echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
841			}
842			$i++;
843		}
844		return $most_recent_ind_ranking;
845	}
846
847	/**
848		* get_cached_solt_ranking_id($solt_sk, $individual_state)
849		*
850		* @return int: stateID of cached ranking for this project (if it's in the cache)
851		* @author Kristian Ljungkvist
852		**/
853	function get_cached_solt_ranking_id($solt_sk,$individual_state) {
854		$cached_solt_ranking_id = -1; // -1 means no match, in this context.
855		$i=1;
856		foreach($individual_state as $cur_state) {
857			if(!$cur_state['ranking']) {continue;}
858			if(($cur_state['ranking']['_Source'] =='Project') && ($cur_state['ranking']['_SourceProfile'] == $solt_sk)) {
859				$cached_solt_ranking_id = $i;
860			}
861			$i++;
862		}
863		return $cached_solt_ranking_id;	
864	}
865
866	/**
867		* get_page_indicator
868		*
869		**/
870	function get_page_indicator($display_array,$state_array) {
871		$total_pages = $display_array[1]['total_pages'];
872		$current_page = $state_array['p'];
873
874		// Figure out which page numbers to display. Return an array 
875
876
877		// First, figure out if we should display the '<' and '>' buttons.
878		if($current_page == 1) {
879			$p_i['Prev'] = 0;
880		} else {
881			$p_i['Prev'] = 1;
882		}
883
884		if($current_page +1 > $total_pages) {
885			$p_i['Next'] = 0;
886		} else {
887			$p_i['Next'] = 1;
888		}
889
890		// Now, figure out the range of numbers to display.
891		// This should be 11 numbers.
892
893		// If current_page < 7, 1-11. >=7, current_page - 5 through current_page + 5 (unless total_pages is less.)
894
895		if($current_page < 7) {
896			$pages = range(1,max((min(11,$total_pages)),1));  // The max function is there to handle the case where total_pages is zero (KEL 07/03/08)
897		} else {
898			if(($current_page + 5) < $total_pages) {
899				$pages = range(($current_page - 5),($current_page + 5));
900			} else {
901				$pages = range(($current_page -5), $total_pages);
902			}
903		}
904
905		$p_i['Pages'] = $pages;
906
907
908		return $p_i;
909	}
910
911	/**
912		* recommendation_summary
913		*
914		* @param ranking_array current ranking
915		* @param max_areas_to_include: hard limit (if non-zero) of how many areas to include
916		* @param msd_limit: limit, max MSD of areas to include. If any area has a median higher than this, 
917		*		  it is not included in the results.
918		* @return array containing a sorted, possibly restricted list of median MSDs per interest area.
919		* @author Kristian Ljungkvist
920		**/
921	function recommendation_summary($ranking_array,$min_areas_to_include,$max_areas_to_include,$msd_limit){
922
923		foreach($ranking_array as $SK => $MSD) {
924			if(strpos($SK, '_') === 0) {
925				continue;
926			}
927
928
929			//	Build array of subarea,MSD for all subareas in ranking:
930			//	Chem => array(0.23,0.24,0.34)
931			//	Bio => array(0.34,0.55,0.67)
932
933			// Extract the sub area
934			preg_match('/^([^_]+)/', $SK, $match);
935			$subarea = $match[0];
936			if($subarea == 'HoneyPot') {
937				continue;
938			}
939			$msd_by_area[$subarea][]=$MSD;
940
941		}
942		// Compute the median msd for each area
943		foreach($msd_by_area as $area => $msd_list) {
944			if($area) {
945				$median_msd_by_area[$area] = $msd_list[floor(count($msd_list)/2)];
946			}
947		}
948
949		// Now sort the areas in increasing median msd order.
950		asort($median_msd_by_area);
951		// First, if msd_limit is set, exclude any areas with median msd higher than that number
952
953		if($msd_limit) {
954			foreach($median_msd_by_area as $area => $median_msd) {
955				if($median_msd <= $msd_limit) {
956					$restricted_list[$area] = $median_msd;
957				}
958			}
959			if(count($restricted_list) > $min_areas_to_include) {
960				$median_msd_by_area = $restricted_list;
961			}
962		}
963
964		// Secondly, if max_areas_to_include is set, place a hard-limit on the number of items in the list.
965		if($max_areas_to_include) {
966			$median_msd_by_area = array_slice($median_msd_by_area, 0, $max_areas_to_include);
967		}
968
969		// Look up interest area titles, etc. from project_ideas table.
970		$cur_rank = 0;
971		foreach($median_msd_by_area as $area => $median_msd) {
972			$sql = "SELECT * FROM project_ideas WHERE SubAreaCode='$area' LIMIT 1";
973			$project_idea_area_data = $this->DB_Connection->query($sql);
974
975			$display_array[$cur_rank]['subareacode'] = $project_idea_area_data['SubAreaCode'];
976			$display_array[$cur_rank]['subarea'] = $project_idea_area_data['SubArea'];
977			$display_array[$cur_rank]['area'] = $project_idea_area_data['Area'];
978			$display_array[$cur_rank]['subarea_median_msd'] = $median_msd;
979			$cur_rank++;
980		}
981		// return the displayable list, limited perhaps by length and max MSD.
982		return($display_array);
983	}
984}
985?>