PageRenderTime 6ms CodeModel.GetById 65ms app.highlight 42ms RepoModel.GetById 2ms app.codeStats 0ms

/Recommender_Bayes.class.php

https://bitbucket.org/kljungkvist/test-fork
PHP | 976 lines | 489 code | 197 blank | 290 comment | 59 complexity | 30f5749c8800207d7f7c5cf5d1ed1b3f MD5 | raw file
  1<?php
  2/*********************************************************** 
  3 * Recommender Class -- Bayes/memcache version
  4 *
  5 *	06/16/2009 -- KEL created Bayes/Memcache fork for 2009 version
  6 * Created 08/07/2007 by Kristian Ljungkvist Based on Recommender test bed work.
  7 *
  8 * Copyright 2007 Science Buddies. All Rights Reserved
  9 ************************************************************/ 
 10require_once 'sciencebuddies/Error.class.php';
 11require_once 'sciencebuddies/Database.class.php';
 12require_once 'sciencebuddies/Modifier.class.php';
 13
 14class Recommender
 15{
 16
 17	var $sk_summary = array();
 18	var $configuration = array();
 19	var $memcache_handle = null;
 20	var $DB_Connection = null;
 21	var $dataset_filename = "";
 22
 23
 24	/**
 25		* Constructor
 26		*
 27		*
 28		* @author Kristian Ljungkvist
 29		*/
 30	function	__construct($db,$originID,$dataset,$memcache)	{
 31		// Bayes/memcache version uses dataset as a prefix into memcache for the various datasets.
 32		if(!$db) {
 33			// No database handle is considered a critical error.
 34			trigger_error("No Database handle passed to Recommender constructor",E_USER_ERROR);
 35			exit;
 36		}
 37		if(!$memcache) {
 38				// No memcache handle is considered a critical error.
 39				trigger_error("No Memcache handle passed to Recommender constructor",E_USER_ERROR);
 40				exit;
 41			
 42		}
 43		// Now, get configuration for recommender with the current originID -- default: 1.		
 44		$this->DB_Connection = $db;
 45		$sql = "SELECT * FROM recommender_config where originID=$originID";
 46
 47		$this->configuration = $this->DB_Connection->query($sql);
 48
 49		$this->memcache_handle = $memcache;
 50		$this->dataset_filename = $dataset;
 51		$this->sk_summary = $memcache->get("$dataset:sk_summary");
 52	}
 53
 54	/**
 55		* individual_ranking
 56		*
 57		* Returns the ranking of SKs for the current dataset and the individual passed in.
 58		* @author Kristian Ljungkvist
 59		*/
 60
 61	function individual_ranking($ind_summary) {
 62
 63
 64		$ranking_array = $this->algorithm_bayes($ind_summary,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
 65	//	$ranking_array = $this->algorithm070220h($this->sk_summary,$ind_summary,4);
 66
 67		// KEL 10/18/07 -- Also tag the resulting ranking with the ProfileID of the profile that created it.
 68		// We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
 69
 70		$ranking_array['_Source'] = 'Individual';
 71		$ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
 72		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
 73		// This is so that we can check for updated datasets and invalidate the cache in those cases.
 74		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
 75		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
 76		
 77		$ranking_array['_DatasetFilename'] = $this->dataset_filename; 	// KEL 06/24/09 -- Store the originating dataset filename for future reference
 78																		// since we'll have several "neighborhood" datasets.
 79
 80		return $ranking_array;
 81	}
 82
 83
 84	/**
 85		* show_others_like_this
 86		*
 87		* Returns the ranking of SKs against the passed-in SK.
 88		* @author Kristian Ljungkvist
 89		*/
 90
 91	function show_others_like_this($solt_sk) {
 92		// get profile for the source project 
 93		// Look up title for this SK.
 94		preg_match('/^([^_]+)/', $solt_sk, $match);
 95		$subarea = $match[0];
 96
 97		$source_project_profile = $this->sk_summary[$subarea]['SKs'][$solt_sk];
 98
 99		// Generate the ranking of that project against the others
100		
101		// NOTE: We need to generate the project profile somehow in the new Bayes context. We'll most likely need to store one record per project in memcache with
102		// the project profile. This will need to happen in the add_to_memcache utility.
103		$ranking_array = $this->algorithm_bayes($source_project_profile,4,$mof,0,0, $this->dataset_filename, $this->memcache_handle);
104
105
106		// KEL 10/18/07 -- Also tag the resulting ranking with the source and the ProfileID of the profile that created it.
107		// We'll use leading underscores on the header information here to make it easy for other phases to strip these out of displayed result sets.
108
109		$ranking_array['_Source'] = 'Project';
110		$ranking_array['_SourceProfile'] = $solt_sk; // For show others like this, we'll use the project filename as the source ID.
111		
112		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
113		// This is so that we can check for updated datasets and invalidate the cache in those cases.
114		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
115		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
116
117		return $ranking_array;
118	}
119
120
121	/**
122		* static_ranking
123		*
124		* Essentially converts the current dataset into a "ranking" of the form the rendering methods expect.
125		* Used for the dynamic interest area pages.
126		* @author Kristian Ljungkvist 05/21/08
127		*/
128
129	function static_ranking() {
130		
131		// Convert the sk_summary member to the SK->MSD form. We'll set all MSD's to 1.0 for this "ranking"
132		
133		foreach($this->sk_summary as $subarea => $subarea_array) {
134			foreach($subarea_array['SKs'] as $SK => $SK_array) {
135				$ranking_array[$SK] = 1.0;
136			}
137		}
138
139		$ranking_array['_Source'] = 'Individual';
140		$ranking_array['_SourceProfile'] = $ind_summary['ProfileID'];
141
142		// KEL 09/18/2008 -- Also tag the ranking with the time stamp of the dataset that was used.
143		// This is so that we can check for updated datasets and invalidate the cache in those cases.
144		$stat_array = stat("./project_ideas/development/".$this->dataset_filename);
145		$ranking_array['_DatasetModTime'] = $stat_array['mtime'];
146
147		return $ranking_array;
148	}
149
150
151
152	function algorithm_bayes($individual_array,$threshold,$mof,$recenter,$gcv, $dataset, $memcache) {
153		// Get list of prior probabilities
154		$prior_probabilities_list = $memcache->get("$dataset:prior_prob");
155		
156		echo "algorithm_bayes:prior_probabilities_list:<br/>";
157		echo "<pre/>".print_r($prior_probabilities_list)."</pre>";
158
159		$MATURITY = 3; // offset in to the answer_list for the maturity
160
161
162		$unordered_ranking = array();
163		$project_prob = 1;
164
165		$num_matching_questions = 0; // keeps track of the total number of matching questions between ind and project.
166		$highest_maturity = 0; // records the maturity of the most mature question
167		$skipped_questions = 0;
168		$matching_questions_ref = array();
169		$highest_maturities_ref = array();
170		$question_maturity_threshold = 100;
171
172		foreach($individual_array['Questions'] as $question => $user_answer) {
173			// 		Grab the corresponding project data list from memcached.
174			$project_list = $memcache->get("$dataset:$question:$user_answer");
175			$project_maturity_list = $memcache->get("$dataset:$question:3");
176			foreach($project_list as $project => $answer_prob) {
177				$proj_ptr = &$unordered_ranking[$project]; // get a pointer to the project in the ranking. Speeds things up quite a bit.
178				$maturity_ptr = &$project_maturity_list[$project];
179				if($answer_prob == 0) {continue;}
180				if($maturity_ptr > $question_maturity_threshold) {
181					if(!$proj_ptr) {
182						$proj_ptr = 1.0;
183					}
184					$proj_ptr = $proj_ptr * $answer_prob;
185
186					$mqr_ptr = &$matching_questions_ref[$project];
187					$mqr_ptr++;  
188					$project_hmr_ptr = &$highest_maturity_ref[$project];
189					if($maturity_ptr > $project_hmr_ptr) {
190						$project_hmr_ptr = $maturity_ptr;
191					}
192				}
193			}
194		}
195
196		// Now need to loop through the projects and do the nth root/ prior probability calculations.
197		$include_prior_probability = 1;
198		foreach($matching_questions_ref as $project => $num_matching_questions) {					
199			$unordered_ranking[$project] = $unordered_ranking[$project] * $prior_probabilities_list[$project]; // Multiply by the prior probability.
200			$unordered_ranking[$project] = pow($unordered_ranking[$project],(1 / ($num_matching_questions +1))); // Take the nth root where n = number of matching questions for this project.
201			if(($unordered_ranking[$project] < 1) && ($highest_maturity_ref[$project] > $threshold)) {
202				$filtered_unordered_ranking[$project] = $unordered_ranking[$project];
203			}
204		}
205		// Now sort the unordered ranking
206		arsort($filtered_unordered_ranking);
207
208		$ranking_array = $filtered_unordered_ranking;
209		foreach($ranking_array as $SK => $prob) {
210			$return_ranking[$SK] = $prob;
211		}
212		return($return_ranking);
213	}
214
215
216
217	/**
218		* function algorithm070220h
219		*
220		**/
221	function algorithm070220h($big_array,$individual_array,$threshold,$mof,$recenter,$gcv) {
222
223		// big_array holds an entire subject area. Individual_array is one individual's responses.
224		$squared_diff = array();
225		$absolute_diff = array();
226		$MSD = array();
227		$MAV = array();
228		$SAV = array();
229
230		// "<hr/>gcv = $gcv, recenter = $recenter<hr/>";
231		if(!$recenter) {
232			if($this->configuration['GlobalRecenterValue'] > 0) {
233				$recenter = true;
234				$gcv = $this->configuration['GlobalRecenterValue'];
235			}
236		}
237		if($recenter) {
238			/** If recenter == True, alter the individual_array by recentering the values:
239			*
240				* 1) Calculate the overall average of all the questions that the user answered = UserAvg 
241				* 2)  Calculate a re-centering value for the user: 
242			*  UserOffset = UserAvg ? GlobalCenterValue  [from the template] 
243				* 3)  Before calculating either the squared or absolute difference, adjust the user values in the $individual_array, for each question: 
244			* NewUserValuei = OldUserValuei ? UserOffset 
245				**/
246
247			if(!isset($gcv)) {
248				$gcv = 1; // Default Global Center Value to 1
249			}
250			$usercount = 0;
251			$usertotal = 0;
252			foreach($individual_array['Questions'] as $question => $answer) {
253				if(!preg_match("/^_/", $question)) { 
254					continue;// Skip questions other than "01c, 1e," etc.
255				} 
256				$usertotal += $answer;
257				$usercount++;
258			}
259			$useravg = $usertotal / $usercount;
260			//echo "<hr/>useravg: $useravg<hr/>";
261			$useroffset = $useravg - $gcv;
262			//echo "<hr/>useroffset: $useroffset<hr/>";
263
264			array_walk($individual_array['Questions'],'recenter_callback',$useroffset);
265		}
266
267		// For each SK, for each survey question, calculate the squared difference
268		// between the score of the user on the question, and the average for all users
269		// on that question.
270
271		foreach($big_array as $subarea => $subarea_array) {
272			foreach($subarea_array['SKs'] as $SK => $SK_array) {
273				foreach($SK_array['Questions'] as $question => $answer) {
274					if(!preg_match("/^_/", $question)) { 
275						//echo "<HR/> skipping question $question because it's not a survey question";
276						continue;
277					} // Skip questions other than "01c, 1e," etc.
278					if(!isset($individual_array['Questions'][$question])) {
279						//echo "<HR/> skipping question $question because it's not in the individual_array";
280						continue;
281					} // Skip questions the user didn't answer
282					$squared_diff[$SK][$question] = pow(($individual_array['Questions'][$question] - $answer),2);
283					$absolute_diff[$SK][$question] = abs($individual_array['Questions'][$question] - $answer);
284				}
285			}
286		}
287
288
289
290		// For each SK, calculate the MSD by summing the squared differences and dividing by the number of questions
291
292		foreach($squared_diff as $SD_SK => $SD_array) {
293			foreach($SD_array as $sd) {
294				//echo "<HR/>MSD[$SD_SK] += $sd";
295				$MSD[$SD_SK] += $sd;
296			}
297			// echo "<HR/>MSD[$SD_SK] = ".$MSD[$SD_SK]." / ".count($SD_array);
298			// KEL 02/08/08 -- Added rounding to 5 decimal places to save space in the cache.
299			$MSD[$SD_SK] = round($MSD[$SD_SK] / count($SD_array),5);
300		}
301		asort($MSD); // Sort list by ascending MSD -- Best match first.
302
303		// MAV -- Mean Absolute Value of Difference (MAV)
304
305		foreach($absolute_diff as $SD_SK => $SD_array) {
306			foreach($SD_array as $sd) {
307				$MAV[$SD_SK] += $sd;
308			}
309			$MAV[$SD_SK] = $MAV[$SD_SK] / count($SD_array);
310		}
311		asort($MAV); // Sort list by ascending MAV -- Best match first.
312
313		// SAV -- Mean Absolute Value of Difference (SAV)
314
315		foreach($absolute_diff as $SD_SK => $SD_array) {
316			foreach($SD_array as $sd) {
317				$SAV[$SD_SK] += $sd;
318			}
319		}
320		asort($SAV); // Sort list by ascending SAV -- Best match first.
321
322		switch($mof) {
323			case 'MSD':
324			return $MSD;
325			break;
326			case 'MAV':
327			return $MAV;
328			break;
329			case 'SAV':
330			return $SAV;
331			break;
332		}
333		// Default: MSD
334		return $MSD;
335	}
336
337
338	/**
339		* function post_process -- Run through a set of modifiers *once* on creation of a new ranking list.
340		*
341		* Primarily, this is used for new project randomization, since we want the newly randomized list to be
342		* cached just like the original would be.
343		*
344		* Kristian Ljungkvist 09/19/07
345		*
346		**/
347	function post_process($ranking_array,$individual_profile,$state_array) {
348		
349		$modifier_object = ModifierFactory::createModifier('NewProjectRandomizer');
350		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
351		$modifier_object = ModifierFactory::createModifier('ProjectRandomizer');
352		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
353		
354		return $ranking_array;
355	}
356
357
358
359
360
361	/**
362		* function render_results
363		*
364		**/
365	function render_results($ranking_array,$individual_profile,$state_array,$favorites) {
366
367		// Generate a display version of the ranking array.
368
369		// get the list of modifiers from the recommender_configuration for this originID.
370		$modifier_list = $this->configuration['ModifierList'];
371		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
372		$page = $state_array['p'];
373
374		// Cycle through the modifiers as a pipeline.
375
376		foreach (split(',',$modifier_list) as $modifier) {
377			//echo "instantiating $modifier...<br/>";
378			// instantiate the appropriate modifier
379			if($modifier != 'Paginator') {
380				// Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
381				$modifier_object = ModifierFactory::createModifier($modifier);
382
383				// Pass it the current ranking list and state
384				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
385				// take the resulting ranking and pass it through to the next one.   
386			}
387		}
388
389		// Get the total number of pages in the rendered ranking
390
391
392
393
394		$total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
395
396
397
398		// Now, go through the paginator:
399		$modifier_object = ModifierFactory::createModifier('Paginator');
400
401		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
402
403
404		$display_array = array();
405		$cur_rank = 1;
406		foreach($ranking_array as $SK => $MSD) {
407			// Check for empty SK, which would only happen if there was an empty project in the array.
408			// KEL 01/30/08
409			if(!$SK) {
410				// Notify me by email if we get an empty SK -- KEL 01/30/08
411				$msg = "cur_rank = $cur_rank\n";
412				$msg .= "ranking_array:\n";
413				$msg .= print_r($ranking_array,true);
414				$msg .= "\nIndividualID={$state_array['rid']}\n";
415				$msg .= "\npage={$state_array['p']}\n";
416				$msg .= "\nStateID={$state_array['sid']}\n";
417				mail("kristian.ljungkvist@gmail.com","Recommender.class.php -- Empty SK",$msg);
418				continue;
419			}
420			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
421			if(strpos($SK, '_') === 0) {
422				continue;
423			}
424
425
426			// Look up title for this SK.
427			preg_match('/^([^_]+)/', $SK, $match);
428			$subarea = $match[0];
429/*
430
431			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
432			if(!$cur_title) {
433				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
434				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
435			}
436*/
437
438			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
439			// We'll probably decide to cache these in the sk_summary for performance reasons later.
440
441			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
442			$project_idea_data = $this->DB_Connection->query($sql);
443
444
445			$display_array[$cur_rank-1]['rank'] = $cur_rank;
446			$display_array[$cur_rank-1]['subarea'] = $subarea;
447			$display_array[$cur_rank-1]['filename'] = $SK;
448			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
449
450			// Store total number of pages in result set in top entry of display_array.   
451			$display_array[$cur_rank-1]['total_pages'] = $total_pages;
452
453
454			// We'll standardize the capitalization on these. -- KEL 
455			$display_array[$cur_rank-1]['Filename'] = $SK;
456			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
457
458			$display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
459			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
460			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
461			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
462			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
463			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
464
465			// Add a flag if this project has been added to favorites
466
467			if(in_array($SK,$favorites)) {
468				$display_array[$cur_rank-1]['Favorite'] = true;
469			}
470
471			$cur_rank++;
472		}
473		return $display_array;
474	}
475
476	/**
477		* render_favorites
478		*
479		**/
480
481	function render_favorites($favorites) {
482		$display_array = array();
483		$cur_rank = 1;
484		foreach($favorites as $SK) {
485
486			// Look up title for this SK.
487			preg_match('/^([^_]+)/', $SK, $match);
488			$subarea = $match[0];
489
490			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
491			if(!$cur_title) {
492				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
493				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
494			}
495			/*
496				// Look up title for this SK.
497			preg_match('/^([^_]+)/', $SK, $match);
498			$subarea = $match[0];
499			//echo "[$subarea],";
500			//echo "[$subarea][$SK],";
501
502			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
503			//echo "[$cur_title]<br/>";
504			if(!$cur_title) {
505				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
506				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
507			}
508			*/
509
510			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
511			// We'll probably decide to cache these in the sk_summary for performance reasons later.
512
513			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
514			$project_idea_data = $this->DB_Connection->query($sql);
515
516			$display_array[$cur_rank-1]['rank'] = $cur_rank;
517			$display_array[$cur_rank-1]['subarea'] = $subarea;
518			$display_array[$cur_rank-1]['filename'] = $SK;
519			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
520
521			// The following are temporarily fetched from the DB. We should probably cache these in the sk_summary data instead. -- KEL
522			$display_array[$cur_rank-1]['Type'] = $project_idea_data['Type'];
523			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $project_idea_data['DifficultyLevel_Low'];
524			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $project_idea_data['DifficultyLevel_High'];
525			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
526			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
527			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
528			$display_array[$cur_rank-1]['Favorite'] = true; // It is a favorite, after all...
529			$cur_rank++;
530		}
531		return $display_array;
532	}
533
534	/**
535		* function render_interest_area
536		*
537		**/
538	function render_interest_area($ranking_array,$individual_profile,$state_array,$favorites) {
539
540		// Generate a display version of the ranking array, filtered by interest area.
541
542		// get the list of modifiers from the recommender_configuration for this originID.
543		$modifier_list = $this->configuration['ModifierList'];
544		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
545		$page = $state_array['p'];
546
547		// Add the InterestArea modifer to the pipeline
548		$modifier_list = 'DifficultyLevel,InterestArea,Deduper';  // Don't do any randomization on interest areas.
549	//	$modifier_list .= ',InterestArea';
550		//AreaAssignment,DifficultyLevel,TimeRequired,Deduper,FirstPageNotRandom,HoneyPot,Paginator
551		
552		// Cycle through the modifiers as a pipeline.
553
554		foreach (split(',',$modifier_list) as $modifier) {
555			// instantiate the appropriate modifier
556			if(($modifier != 'Paginator') && ($modifier != 'AreaAssignment')) {
557				// Skip poginator for now, since we need to figure out how many pages long the rendered list is given the current constraints. (for page control)
558				// Also skip the AreaAssignment modifier, since we want to override that with the InterestArea Modifier.
559				$modifier_object = ModifierFactory::createModifier($modifier);
560				
561
562				// Pass it the current ranking list and state
563				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
564				// take the resulting ranking and pass it through to the next one.   
565			}
566		}
567
568		// Get the total number of pages in the rendered ranking
569
570		$total_pages = ceil(count($ranking_array) / $this->configuration['MaxProjectsPerPage']);
571
572
573
574		// Now, go through the paginator:
575		$modifier_object = ModifierFactory::createModifier('Paginator');
576
577		$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
578
579		$display_array = array();
580		$cur_rank = 1;
581		foreach($ranking_array as $SK => $MSD) {
582			// Check for empty SK, which would only happen if there was an empty project in the array.
583			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
584			if(strpos($SK, '_') === 0) {
585				continue;
586			}
587
588			// Look up title for this SK.
589			preg_match('/^([^_]+)/', $SK, $match);
590			$subarea = $match[0];
591
592			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
593			if(!$cur_title) {
594				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
595				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
596			}
597
598			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
599			// We'll probably decide to cache these in the sk_summary for performance reasons later.
600
601			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
602			$project_idea_data = $this->DB_Connection->query($sql);
603
604
605			$display_array[$cur_rank-1]['rank'] = $cur_rank;
606			$display_array[$cur_rank-1]['subarea'] = $subarea;
607			$display_array[$cur_rank-1]['filename'] = $SK;
608			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
609
610
611			// Store total number of pages in result set in top entry of display_array.   
612			$display_array[$cur_rank-1]['total_pages'] = $total_pages;
613
614			// We'll standardize the capitalization on these. -- KEL 
615			$display_array[$cur_rank-1]['Filename'] = $SK;
616			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
617
618			$display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
619			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
620			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
621			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
622			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
623			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
624
625			// Add a flag if this project has been added to favorites
626
627			if(in_array($SK,$favorites)) {
628				$display_array[$cur_rank-1]['Favorite'] = true;
629			}
630
631			$cur_rank++;
632		}
633		return $display_array;
634	}
635	
636	
637	/**
638		* function render_interest_area_all_projects
639		*
640		**/
641	function render_interest_area_all_projects($ranking_array,$individual_profile,$state_array,$favorites) {
642
643		// Generate a display version of the ranking array, filtered by interest area.
644		
645		// For this version, we don't do any other filtering.
646
647		
648		$modifier_list = 'InterestArea';
649		// Cycle through the modifiers as a pipeline.
650
651		foreach (split(',',$modifier_list) as $modifier) {
652				$modifier_object = ModifierFactory::createModifier($modifier);
653				// Pass it the current ranking list and state
654				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
655				// take the resulting ranking and pass it through to the next one.   
656		}
657
658
659		$display_array = array();
660		$cur_rank = 1;
661		foreach($ranking_array as $SK => $MSD) {
662			// Check for empty SK, which would only happen if there was an empty project in the array.
663			// escape the header fields that are prefixed with an underline, -- KEL 10/18/07
664			if(strpos($SK, '_') === 0) {
665				continue;
666			}
667
668			// Look up title for this SK.
669			preg_match('/^([^_]+)/', $SK, $match);
670			$subarea = $match[0];
671
672			$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Title'];
673			if(!$cur_title) {
674				// Fix for datasets generated with the dataset compiler. The Title is in the Questions Array on those datasets.
675				$cur_title = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Title'];
676			}
677
678			// KEL 08/29/07 -- Inserted database queries here to grab fields that we're currently not cacheing in the sk_summary.
679			// We'll probably decide to cache these in the sk_summary for performance reasons later.
680
681			$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
682			$project_idea_data = $this->DB_Connection->query($sql);
683
684
685			$display_array[$cur_rank-1]['rank'] = $cur_rank;
686			$display_array[$cur_rank-1]['subarea'] = $subarea;
687			$display_array[$cur_rank-1]['filename'] = $SK;
688			$display_array[$cur_rank-1]['title'] = $project_idea_data['Title'];
689
690
691			// We'll standardize the capitalization on these. -- KEL 
692			$display_array[$cur_rank-1]['Filename'] = $SK;
693			$display_array[$cur_rank-1]['Title'] = $project_idea_data['Title'];
694
695			$display_array[$cur_rank-1]['Type'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['Type'];
696			$display_array[$cur_rank-1]['DifficultyLevel_Low'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_Low'];
697			$display_array[$cur_rank-1]['DifficultyLevel_High'] = $this->sk_summary[$subarea]['SKs'][$SK]['Questions']['DifficultyLevel_High'];
698//			$display_array[$cur_rank-1]['Abstract'] = $project_idea_data['Abstract'];
699//			$display_array[$cur_rank-1]['Video'] = $project_idea_data['Video'];
700//			$display_array[$cur_rank-1]['ImgFilename_Abstract'] = $project_idea_data['ImgFilename_Abstract'];
701
702			// Add a flag if this project has been added to favorites
703
704			if(in_array($SK,$favorites)) {
705				$display_array[$cur_rank-1]['Favorite'] = true;
706			}
707
708			$cur_rank++;
709		}
710		return $display_array;
711	}
712
713	/**
714		* function get_first_recommendation -- returns the top project that would be displayed for this user, which in many cases
715		* will differ from the top ranked project due to interestarea restrictions, difficulty level, and time required.
716		*
717		* Basically, this method runs through all the standard filters and modifiers except the pagination modifier, and 
718		* Returns the first entry in the resulting list.
719		*
720		* Primarily, this is used for logging the FirstRecommendation in the recommender_action table.
721		*
722		* Kristian Ljungkvist 10/09/07
723		*
724		**/  
725	function get_first_recommendation($ranking_array,$individual_profile,$state_array,$favorites) {
726		// get the list of modifiers from the recommender_configuration for this originID.
727		$modifier_list = $this->configuration['ModifierList'];
728		$projects_per_page = $this->configuration['MaxProjectsPerPage'];
729		$page = $state_array['p'];
730
731		// Cycle through the modifiers as a pipeline.
732
733		foreach (split(',',$modifier_list) as $modifier) {
734			//echo "instantiating $modifier...<br/>";
735			// instantiate the appropriate modifier
736			if($Modifier != 'Paginator') {
737				$modifier_object = ModifierFactory::createModifier($modifier);
738				// Pass it the current ranking list and state
739				$ranking_array = $modifier_object->process($ranking_array,$individual_profile,$state_array,$this->configuration,$this->sk_summary);
740				// take the resulting ranking and pass it through to the next one.   
741			}
742		}
743		return key($ranking_array);
744
745	}
746	/**
747		* get_title_of_sk
748		*
749		**/
750
751	function get_title_of_sk($SK) {
752		$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
753		$project_idea_data = $this->DB_Connection->query($sql);
754		return $project_idea_data['Title'];
755	}
756
757	/**
758		* get_solt_details_of_sk
759		*
760		* Eleboration on get_title_of_sk to get subareacode, etc.
761		**/
762
763	function get_solt_details_of_sk($SK) {
764		$sql = "SELECT * FROM project_ideas WHERE Filename='$SK'";
765		$project_idea_data = $this->DB_Connection->query($sql);
766		return $project_idea_data;
767	}
768
769
770	/**
771		* get_msd_of_sk
772		*
773		**/
774
775	function get_MSD_of_sk($SK,$individual_state,$state_array) {
776		$stateID = $state_array['sid'];
777
778		// First, find the most recent individual-based ranking for this individual.
779
780		$mrir = $this->get_most_recent_individual_ranking($individual_state);
781		//	echo "<hr/>mrir:<br/>";
782		//	print_r($mrir);
783		//	echo "<hr/>";
784
785		// Find the record in the corresponding ranking list for SK.
786		$MSD = $mrir[$SK];
787		return($MSD);
788	}
789
790	/**
791		* get_most_recent_individual_ranking($individual_state)
792		*
793		* @return array: most recent individual ranking
794		* @author Kristian Ljungkvist
795		**/
796	function get_most_recent_individual_ranking($individual_state) {
797		// Loop through the states and identify the individual-sourced
798		// ranking with the highest stateID.
799		$i= 1;
800		$most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
801		foreach($individual_state as $cur_state) {
802			//echo "$i: cur_state:".print_r($cur_state)."<hr/>";
803			//echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
804			if(!$cur_state['ranking']) {continue;}
805			if($cur_state['ranking']['_Source'] =='Individual') {
806				$most_recent_ind_ranking = $i;
807				//echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
808			}
809			$i++;
810		}
811		return $individual_state[$most_recent_ind_ranking]['ranking'];
812	}
813
814	/**
815		* get_most_recent_individual_ranking_id($individual_state)
816		*
817		* @return int: most recent individual ranking ID (stateID in cache)
818		* @author Kristian Ljungkvist
819		**/
820	function get_most_recent_individual_ranking_id($individual_state) {
821		// Loop through the states and identify the individual-sourced
822		// ranking with the highest stateID.
823		$i= 1;
824		$most_recent_ind_ranking = 1; // default, since we know the first ranking is based on the ind.
825		foreach($individual_state as $cur_state) {
826			//echo "$i: cur_state:".print_r($cur_state)."<hr/>";
827			//echo "index: $i:cur_state::ranking::Source = ".$cur_state['ranking']['_Source']."<br/>";
828			if(!$cur_state['ranking']) {continue;}
829			if($cur_state['ranking']['_Source'] =='Individual') {
830				$most_recent_ind_ranking = $i;
831				//echo "most_recent_ind_ranking: $most_recent_ind_ranking<br/>";
832			}
833			$i++;
834		}
835		return $most_recent_ind_ranking;
836	}
837
838	/**
839		* get_cached_solt_ranking_id($solt_sk, $individual_state)
840		*
841		* @return int: stateID of cached ranking for this project (if it's in the cache)
842		* @author Kristian Ljungkvist
843		**/
844	function get_cached_solt_ranking_id($solt_sk,$individual_state) {
845		$cached_solt_ranking_id = -1; // -1 means no match, in this context.
846		$i=1;
847		foreach($individual_state as $cur_state) {
848			if(!$cur_state['ranking']) {continue;}
849			if(($cur_state['ranking']['_Source'] =='Project') && ($cur_state['ranking']['_SourceProfile'] == $solt_sk)) {
850				$cached_solt_ranking_id = $i;
851			}
852			$i++;
853		}
854		return $cached_solt_ranking_id;	
855	}
856
857	/**
858		* get_page_indicator
859		*
860		**/
861	function get_page_indicator($display_array,$state_array) {
862		$total_pages = $display_array[1]['total_pages'];
863		$current_page = $state_array['p'];
864
865		// Figure out which page numbers to display. Return an array 
866
867
868		// First, figure out if we should display the '<' and '>' buttons.
869		if($current_page == 1) {
870			$p_i['Prev'] = 0;
871		} else {
872			$p_i['Prev'] = 1;
873		}
874
875		if($current_page +1 > $total_pages) {
876			$p_i['Next'] = 0;
877		} else {
878			$p_i['Next'] = 1;
879		}
880
881		// Now, figure out the range of numbers to display.
882		// This should be 11 numbers.
883
884		// If current_page < 7, 1-11. >=7, current_page - 5 through current_page + 5 (unless total_pages is less.)
885
886		if($current_page < 7) {
887			$pages = range(1,max((min(11,$total_pages)),1));  // The max function is there to handle the case where total_pages is zero (KEL 07/03/08)
888		} else {
889			if(($current_page + 5) < $total_pages) {
890				$pages = range(($current_page - 5),($current_page + 5));
891			} else {
892				$pages = range(($current_page -5), $total_pages);
893			}
894		}
895
896		$p_i['Pages'] = $pages;
897
898
899		return $p_i;
900	}
901
902	/**
903		* recommendation_summary
904		*
905		* @param ranking_array current ranking
906		* @param max_areas_to_include: hard limit (if non-zero) of how many areas to include
907		* @param msd_limit: limit, max MSD of areas to include. If any area has a median higher than this, 
908		*		  it is not included in the results.
909		* @return array containing a sorted, possibly restricted list of median MSDs per interest area.
910		* @author Kristian Ljungkvist
911		**/
912	function recommendation_summary($ranking_array,$min_areas_to_include,$max_areas_to_include,$msd_limit){
913
914		foreach($ranking_array as $SK => $MSD) {
915			if(strpos($SK, '_') === 0) {
916				continue;
917			}
918
919
920			//	Build array of subarea,MSD for all subareas in ranking:
921			//	Chem => array(0.23,0.24,0.34)
922			//	Bio => array(0.34,0.55,0.67)
923
924			// Extract the sub area
925			preg_match('/^([^_]+)/', $SK, $match);
926			$subarea = $match[0];
927			if($subarea == 'HoneyPot') {
928				continue;
929			}
930			$msd_by_area[$subarea][]=$MSD;
931
932		}
933		// Compute the median msd for each area
934		foreach($msd_by_area as $area => $msd_list) {
935			if($area) {
936				$median_msd_by_area[$area] = $msd_list[floor(count($msd_list)/2)];
937			}
938		}
939
940		// Now sort the areas in increasing median msd order.
941		asort($median_msd_by_area);
942		// First, if msd_limit is set, exclude any areas with median msd higher than that number
943
944		if($msd_limit) {
945			foreach($median_msd_by_area as $area => $median_msd) {
946				if($median_msd <= $msd_limit) {
947					$restricted_list[$area] = $median_msd;
948				}
949			}
950			if(count($restricted_list) > $min_areas_to_include) {
951				$median_msd_by_area = $restricted_list;
952			}
953		}
954
955		// Secondly, if max_areas_to_include is set, place a hard-limit on the number of items in the list.
956		if($max_areas_to_include) {
957			$median_msd_by_area = array_slice($median_msd_by_area, 0, $max_areas_to_include);
958		}
959
960		// Look up interest area titles, etc. from project_ideas table.
961		$cur_rank = 0;
962		foreach($median_msd_by_area as $area => $median_msd) {
963			$sql = "SELECT * FROM project_ideas WHERE SubAreaCode='$area' LIMIT 1";
964			$project_idea_area_data = $this->DB_Connection->query($sql);
965
966			$display_array[$cur_rank]['subareacode'] = $project_idea_area_data['SubAreaCode'];
967			$display_array[$cur_rank]['subarea'] = $project_idea_area_data['SubArea'];
968			$display_array[$cur_rank]['area'] = $project_idea_area_data['Area'];
969			$display_array[$cur_rank]['subarea_median_msd'] = $median_msd;
970			$cur_rank++;
971		}
972		// return the displayable list, limited perhaps by length and max MSD.
973		return($display_array);
974	}
975}
976?>