southsidehealth /bin/SLMerge/SLMerge.php

Language PHP Lines 728
MD5 Hash 7ef27f8f003b5529288b8110734fe69d Estimated Cost $11,873 (why?)
Repository https://bitbucket.org/southsidehealth/southsidehealth View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
<?php
//Note: when this script is run, there should be a table called tmp_locations
//      to store the new locations as they are made.  Afterwards we can rename
//      the old locations table.
// This script should be placed in cake's bin/SLMerge directory in order for the path names
// to work properly
// 
// The log file will record every tmp_site, tmp_location, and services_tmp_sites entry
// that was added to the database.
// The error log file will record the number of orphaned users, the number of orphaned
// services, and the number of tmp_sites without any services, as well as 
// recording the name and id of each such orphaned user, etc.
// It will also record every tmp_site that is created with no location info
// and every failed attempt to update services_tmp_sites.


define ('LOG_ERROR', dirname(dirname(dirname(__FILE__))) . "/tmp/SLMerge_errors.log");
define ('LOG_SUCCESS', dirname(dirname(dirname(__FILE__))) . "/tmp/SLMerge.log");
define('DATE_FORMAT', "Y-m-d H:i:s");

class SLMerge
{
    var $id_translations = array('sites'=>array(), 'locations'=>array());
    #var $max_tmp_loc_id = 0;
    var $max_agency_id = 0;
    var $conflict = false;
    var $limit = null;
    var $debug = false;
    var $agencies_services = array();
    var $dummy_id = null;
    var $Mptt;
    var $lang_regexp;
    var $langTok;

    function __construct($limit = null, $clear = false, $debug = false) {

	require_once("mptt.php");
        require_once("database.php");
	require_once("LangTokenizer.php");

	$this->limit = $limit;
	$this->debug = $debug;


	if ($clear) {
	    mysql_query("DELETE FROM tmp_sites;");
	    mysql_query("DELETE FROM services_tmp_sites;");
	    mysql_query("DELETE FROM languages_tmp_sites;");
	    mysql_query("UPDATE languages SET site_count = 0;");
	    if (file_exists(LOG_SUCCESS))
	        unlink(LOG_SUCCESS);
	    if (file_exists(LOG_ERROR))
	        unlink(LOG_ERROR);
        }

        /* set max agency id */
	$max_agency_id = mysql_query("SELECT MAX(id) FROM tmp_sites;");
	if (!$max_agency_id)
	    die(__CLASS__ ."::" .__FUNCTION__ . " failed on line " . __LINE__. "\n" . mysql_error());
	
	$max = mysql_fetch_array($max_agency_id);
	if ($max['MAX(id)'])
	    $this->max_agency_id = $max['MAX(id)'];
	else
	    $this->max_agency_id = 0;
	
	/* set max tmp_location id */
	#$max_tmp_loc_id = mysql_query("SELECT MAX(id) FROM tmp_locations;");
	#if (!$max_tmp_loc_id)
	    #die(__CLASS__ . "::" . __FUNCTION__ . " failed on line " . __LINE__ . "\n" . mysql_error());

	#$max = mysql_fetch_array($max_tmp_loc_id);
	#if ($max['MAX(id)'])
	    #$this->max_tmp_loc_id = $max['MAX(id)'];
	#else
	    #$this->max_tmp_loc_id = 0;

        /* build array of services_tmp_sites */
	$agencies_services = mysql_query("SELECT * FROM services_tmp_sites;");
	while ($as = mysql_fetch_array($agencies_services))
	    $this->agencies_services[$as['tmp_site_id']] = $as['service_id'];

	/* create a dummy location for any sites that do not have good location info on file */
	#$dummy_id = mysql_query("SELECT id FROM tmp_locations WHERE name LIKE \"Dummy\";");
	#if (!($dummy_id = mysql_fetch_array($dummy_id))) {

	    #mysql_query("INSERT INTO tmp_locations(id, name, lat, lng, location) 
	        #VALUES (". ++$this->max_tmp_loc_id .", \"Dummy\", 0, 0, GeomFromText('POINT (0 0)'));");

	    #$this->dummy_id = $this->max_tmp_loc_id;

	#} else {
	    #$this->dummy_id = $dummy_id['id'];
	#}


	/* initialize an MPTT object for inserting into the tmp_sites table */
	$this->Mptt = new Mptt("tmp_sites");

	$this->langTok = new LangTokenizer;

	require_once("languages.php");
	if (isset($languages)) {
	    $this->lang_regexp = "/(" . strtolower(implode("|", $languages)) . ")/";
	}
    }


    function merge() {
	/* lock tables */
        $lock_query = "LOCK TABLES tmp_sites WRITE, users WRITE, languages_tmp_sites WRITE," 
	    . " languages WRITE, services_tmp_sites WRITE, sites READ, "
	    . "locations_services READ, draft_services WRITE, contacts WRITE, "
	    . "express_carts WRITE, libraries WRITE, location_versions WRITE, "
	    . "locations WRITE, service_versions WRITE, services WRITE;";
	
	if (!mysql_query($lock_query))
	    die("failed to lock tables");

/* Make tmp_locations and tmp_sites from old sites table */
	$query = "SELECT * FROM sites";
	if ($this->limit)
	    $query .= " LIMIT $this->limit";

	$sites = mysql_query($query . ";");
	
	if (!$sites)
	    die(__CLASS__."::".__FUNCTION__." failed on line " . __LINE__ . 
	        " with the following MySQL error:\n" . mysql_error());
	
	while ($site = mysql_fetch_array($sites)) {
	    echo "Migrating site " . $site['id'] . "\n";

	    #$tmp_loc_id = $this->makeLocation($site);

	    $agency_id = $this->makeAgency($site);

	    $this->id_translations['sites'][$site['id']] = $agency_id; #array('tmp_loc_id'=>$tmp_loc_id, 'agency_id'=>$agency_id);

	    $this->makeAgenciesServices('services', $site, $agency_id);

	    if (!mysql_query("UPDATE draft_services SET parent_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE express_carts SET tmp_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE libraries SET tmp_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE location_versions SET tmp_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE locations SET tmp_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE service_versions SET tmp_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	    if (!mysql_query("UPDATE services SET parent_site_id = $agency_id WHERE site_id = " . $site['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");
	}

/* Make locations and agencies from old locations table */        
	$old_locations = mysql_query("SELECT * FROM locations" . ($this->limit ? " LIMIT $this->limit;" : ";"));
	if (!$old_locations)
 	    die(__CLASS__."::".__FUNCTION__." failed on line " . __LINE__ . 
	        " with the following MySQL error:\n" . mysql_error());

	while ($old_location = mysql_fetch_array($old_locations)) {
	    echo "Migrating location " . $old_location['id'] . "\n";

	    # $tmp_loc_id = $this->makeLocation($old_location);

            if (isset($this->id_translations['sites'][$old_location['site_id']])) {
	        $parent_id = $this->id_translations['sites'][$old_location['site_id']];
		# $parent_loc = $this->id_translations['sites'][$old_location['site_id']]['tmp_loc_id'];
	    } else {
	        $parent_id = null;
		# $parent_loc = null;
	    }

	    $agency_id = $this->makeAgency($old_location, $parent_id);

	    $this->id_translations['locations'][$old_location['id']] = $agency_id;

/* set users.tmp_site_id to match the newly made agency */
	    if (!mysql_query("UPDATE users SET tmp_site_id=$agency_id WHERE location_id=".$old_location['id'].";"))
	        trigger_error(__CLASS__."::".__FUNCTION__." failed to set users.tmp_site_id on line " .__LINE__. "\n" . mysql_error());
            
	    if ($affected = mysql_affected_rows()) {
	        if (!mysql_query("UPDATE tmp_sites SET user_count = user_count + $affected WHERE id = $agency_id;"))
	            trigger_error(__CLASS__."::".__FUNCTION__." failed to set users.tmp_site_id on line " .__LINE__. "\n" . mysql_error());
	    }

/* if we are just using the parent and did not make a separate agency, no need to update services_tmp_sites.
 * just go to the next iteration of the loop */
            #if ($parent_id == $agency_id)
	        #continue;
            // else
/* populate agencies_services table */
            $this->makeAgenciesServices("locations_services", $old_location, $agency_id);

	    if (!mysql_query("UPDATE contacts SET tmp_site_id = $agency_id WHERE location_id = " . $old_location['id'] .";"))
	        die(__METHOD__." error on line ".__LINE__ ."\n". mysql_error() ."\n");

	}


	/* set agency_id for all users who have not received an agency yet */
	if (! $users = mysql_query("SELECT id, site_id FROM users WHERE tmp_site_id = 0;"))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");
	
	while ($user = mysql_fetch_array($users)) {
            if (isset($this->id_translations['sites'][$user['site_id']]) &&
	        $this->id_translations['sites'][$user['site_id']['agency_id']] != null) {
	        $agency_id = $this->id_translations['sites'][$user['site_id']]['agency_id'];
		if (! mysql_query("UPDATE users SET tmp_site_id = $agency_id WHERE id = " . $user['id'] . ";"))
	            die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");

		if (!mysql_query("UPDATE tmp_sites SET user_count = user_count + 1 WHERE id = $agency_id;"))
	            die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");
	    }
	}


	    
        /* log info */
	/* tmp_sites without services */
	if (! $no_services = mysql_query("SELECT id, name FROM tmp_sites WHERE id NOT IN (SELECT tmp_site_id FROM services_tmp_sites);"))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");
        /* tmp_sites without a location */
	if (! $no_locs = mysql_query("SELECT id, name FROM tmp_sites WHERE Equals(location, GeomFromText('POINT(0 0)'));"))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");
	/* services without tmp_sites */
	if (! $no_agencies = mysql_query("SELECT id, name FROM services WHERE id NOT IN (SELECT service_id FROM services_tmp_sites);"))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");
	/* users without tmp_sites */
	if (! $users = mysql_query("SELECT id, firstname, lastname FROM users WHERE tmp_site_id NOT IN (SELECT id FROM tmp_sites);"))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__."\n".mysql_error()."\n");

	if ($nrows = mysql_num_rows($no_services))
	    error_log("$nrows tmp_sites created with no services attached to them.\n", 3, LOG_ERROR);
	if ($nrows = mysql_num_rows($no_locs))
	    error_log("$nrows tmp_sites created with no location.\n", 3, LOG_ERROR);
	if ($nrows = mysql_num_rows($no_agencies))
	    error_log("$nrows services are not associated with any tmp_site.\n", 3, LOG_ERROR);
	if ($nrows = mysql_num_rows($users))
	    error_log("$nrows users are not associated with any tmp_site.\n", 3, LOG_ERROR);

	while ($no_service = mysql_fetch_array($no_services)) {
	    error_log("TmpSite ". $no_service['name'] . " (id " .$no_service['id'] . ") has no services associated with it.\n", 3, LOG_ERROR);
	}

	while ($no_loc = mysql_fetch_array($no_locs)) {
	    error_log("TmpSite ". $no_loc['name'] . " (id " .$no_loc['id'] . ") has no location.\n", 3, LOG_ERROR);
	}

	while ($no_agency = mysql_fetch_array($no_agencies)) {
	    error_log("Service " . $no_agency['name'] . " (id " . $no_agency['id'] . ") is not associated with any tmp_sites.\n", 3, LOG_ERROR);
	}

	while ($user = mysql_fetch_array($users)) {
	    error_log("User " . $user['firstname'] ." ". $user['lastname'] . 
	              " (id " . $user['id'] . ") not associated with any tmp_site.\n", 3, LOG_ERROR);
	}

	if (! mysql_query("UNLOCK TABLES;"))
	    die("Failed to unlock tables");
    }

    /* note: this function can be called on sites and locations */
    private function makeLocation($site) {
        /* if lat and lng are not set use dummy location */
        if (!isset($site['lat'])) {
	    error_log("Site ". $site['name'] . " has no lat set.\n", 3, LOG_ERROR);
	    return $this->dummy_id;
	}
        if (!isset($site['lng'])) {
	    error_log("Site ". $site['name'] . " has no lng set.\n", 3, LOG_ERROR);
	    return $this->dummy_id;
	}

	$lat = $site['lat'];
	$lng = $site['lng'];

        if ($lat == 0 || $lng == 0) {
	    return $this->dummy_id;
	}
/* if lat and lng match a location already in the db, ask user before making a new entry */
        $point = "GeomFromText(\"POINT($lat $lng)\")";
	if (!$duplicates = mysql_query("SELECT * FROM tmp_locations WHERE Equals(location, $point);"))
	    die(__CLASS__."::".__FUNCTION__." failed on line ".__LINE__."\n".mysql_error()."\n");

	if (mysql_num_rows($duplicates)) {
	    if ($old_id = $this->keep($site, $duplicates, 'tmp_locations')) 
	        return $old_id;
        }
        // Else go ahead and build the new location


/* build new tmp_location */
        $fields = array('id');
	$values = array(++$this->max_tmp_loc_id);

	$this->_make_helper('name', $site, &$fields, &$values);
	$this->_make_helper('address1', $site, &$fields, &$values);
	$this->_make_helper('address2', $site, &$fields, &$values);
	$this->_make_helper('city', $site, &$fields, &$values);
	$this->_make_helper('state', $site, &$fields, &$values);
	$this->_make_helper('zipcode', $site, &$fields, &$values);

	$fields[] = 'lat';
	$values[] = $lat;

	$fields[] = 'lng';
	$values[] = $lng;

	$fields[] = 'location';
	$values[] = "GeomFromText(\"POINT($lat $lng)\")";
        
	$query = "INSERT INTO tmp_locations(" . implode(", ", $fields) . ") VALUES(" . implode(", ", $values) . ");";
	
	if (! mysql_query($query))
	    trigger_error("Inset on tmp_locations failed using query $query\n" . mysql_error());
	else
	    error_log(date(DATE_FORMAT) ."  new location \"" . $site['name'] . "\" with id " 
	        . $this->max_tmp_loc_id . " made from " . (isset($site['site_id']) ? 'location ' : 'site ') . $site['id']. "\n", 3, LOG_SUCCESS);
	
	return $this->max_tmp_loc_id;
    }

    /**
    * If parent_id is not set, go ahead and make the agency.  If parent_id
    * is set, then compare all fields against the parent agency.  If they are
    * identical, don't bother to make the new agency.  If only a few fields 
    * differ, ask the user whether or not to make the new agency.
    */
    private function makeAgency($site, $parent_id = null) {
       /* if there is a parent agency, make sure the proposed new agency is not identical */
        if ($parent_id) {

	    if (! $parent = mysql_query("SELECT * FROM tmp_sites where id = $parent_id;"))
	        die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__.":\n" . mysql_error() . "\n");

	    if (! mysql_num_rows($parent))
	        die("TmpSite id $parent_id was stored in the id_translations table, but not found in the database.\n");
	    
	    /* if we are going to keep the site, we may need to update languages */
	    if ($this->keep($site, $parent, 'tmp_sites')) {
	        if (isset($site['languages']) && $site['languages'] != null)
		    $this->addLangs($parent_id, $site['languages']);
	        return $parent_id;
	    }

	}

/* build the fields to insert into the agencies table */
        $fields = array('id');
	$values = array(++$this->max_agency_id);

	$this->_make_helper('name', $site, &$fields, &$values);
	$this->_make_helper('website', $site, &$fields, &$values);
	$this->_make_helper('phone', $site, &$fields, &$values, 'main_phone');
	$this->_make_helper('email', $site, &$fields, &$values, 'main_email');
	$this->_make_helper('fax', $site, &$fields, &$values, 'main_fax');
	$this->_make_helper('file_image', $site, &$fields, &$values);
	$this->_make_helper('main_contact', $site, &$fields, &$values);
	$this->_make_helper('neighborhood_served', $site, &$fields, &$values);
	$this->_make_helper('address1', $site, &$fields, &$values);
	$this->_make_helper('address2', $site, &$fields, &$values);
	$this->_make_helper('city', $site, &$fields, &$values);
	$this->_make_helper('state', $site, &$fields, &$values);
	$this->_make_helper('zipcode', $site, &$fields, &$values);

	$lat = $site['lat'];
	$lng = $site['lng'];

	$fields[] = 'lat';
	$values[] = $lat;

	$fields[] = 'lng';
	$values[] = $lng;

	$fields[] = 'location';
	$values[] = "GeomFromText(\"POINT($lat $lng)\")";

	$fields[] = 'created';
	if (isset($site['created']) && $site['created'] != 0 && $site['created'] != '0')
	    $values[] = '"' . $site['created'] . '"' ;
	else
	    $values[] = '"' .date(DATE_FORMAT) . '"';
	

        $fields[] = 'modified';
	if (isset($site['modified']) && $site['modified'] != 0 && $site['modified'] != '0')
	    $values[] = '"' . $site['modified'] . '"';
	else
	    $values[] = '"' . date(DATE_FORMAT) . '"';


        /* insert the new tmp_site */
	if ($this->Mptt->insert($parent_id, $fields, $values)) {
	    error_log(date(DATE_FORMAT) . "  new tmp_site " . $values[1] . " with id " . $values[0] . 
	        " made from " . (isset($site['site_id']) ? 'location ' : 'site ') . $site['id'] . "\n", 3, LOG_SUCCESS);

	    /* on successful creation of a new TmpSite, add the language associations for it */
            if (isset($site['languages']) && $site['languages'] != '') 
	       $this->addLangs($this->max_agency_id, $site['languages']);

	    return $this->max_agency_id;
	}
	else {
	    error_log(date(DATE_FORMAT) . "failed to insert agency\"" . $values[1] . "\" with id " . $values[0] . "\n", 3, LOG_ERROR);
	    return null;
	}
	
    }

    private function addLangs($tmp_site_id, $languages) {
	$languages = strtolower($languages);
	$matches = array();
	preg_match_all($this->lang_regexp, $languages, &$matches);

	foreach ($matches[0] as $language) {
	    /* if this site has already been associated with this language, continue */
	    if (! $old = mysql_query("SELECT * FROM languages_tmp_sites WHERE language_id = 
	                              (SELECT id FROM languages WHERE language = \"$language\")
				      AND tmp_site_id = $tmp_site_id;"))
	        die(__METHOD__." MySQL error on line ".__LINE__.": " . mysql_error() . "\n");
	    if (mysql_num_rows($old) != 0)
	        continue;

	    /* if not, create the new association */
	    if (! mysql_query("INSERT INTO languages_tmp_sites(language_id, tmp_site_id) 
	                       VALUES((SELECT id FROM languages WHERE language = \"$language\"), $tmp_site_id);"))
	        die(__METHOD__." MySQL error on line ".__LINE__.": " . mysql_error() . "\n");
	    if (! mysql_query("UPDATE languages SET site_count = site_count + 1 WHERE language = \"$language\";"))
	        die(__METHOD__." MySQL error on line ".__LINE__.": " . mysql_error() . "\n");
        }
    }
        

    private function makeAgenciesServices($table, $old, $agency_id) {
        /* get the appropriate service ids out of the database */
        if ($table == "locations_services")
	    $query = "SELECT service_id AS id FROM $table WHERE location_id = " . $old['id'];
	             #" UNION SELECT id FROM services WHERE site_id = " . $old['site_id'] . ";";
	
	else if ($table == "services")
	    $query = "SELECT id FROM $table WHERE site_id = " . $old['id'] . ";";
	else
	    die(__CLASS__."::".__FUNCTION__." received bad table value on line ".__LINE__.": $table\n"); 
	
	if (! $service_ids = mysql_query($query))
	    die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__.":\n" . mysql_error() . "\n");
	

	/* create new services_tmp_locations */
	while ($service_id = mysql_fetch_array($service_ids)) {
	    if (isset($this->agencies_services[$agency_id]) && in_array($service_id, $this->agencies_services[$agency_id])) {
		continue;
	    }
	    
	    if (! mysql_query("INSERT INTO services_tmp_sites(tmp_site_id, service_id) 
	                       VALUES ($agency_id, ".$service_id['id'].");"))
	        die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__.":\n" . mysql_error() . "\n");
	    $this->agencies_services[$agency_id][] = $service_id;

	    if (!mysql_query("UPDATE tmp_sites SET service_count = service_count + 1 WHERE id = $agency_id;"))
	        die(__CLASS__."::".__FUNCTION__." received sql error on line ".__LINE__.":\n" . mysql_error() . "\n");

	    error_log(date(DATE_FORMAT)."  new services_tmp_site: service id=". $service_id['id'] .", tmp_site id = $agency_id\n", 3, LOG_SUCCESS);
	}
    }



/**
* this function returns true if we SHOULD keep the new entry
* and false if we should not
* note: $olds should only contain one entry, if we are checking an agency
*/
    private function keep($new, $olds, $table) {

	$old = null;
	while ($tmp = mysql_fetch_array($olds)) {
	    $diff = array();
	    $add = array();
	    $old = $tmp;

	    #if ($old['lat'] != $new['lat'] || $old['lng'] != $new['lng'])
	        #return null;

	    $this->_keep_helper('name', $new, $old, &$diff, &$add);

	    $this->_keep_helper('address1', $new, $old, &$diff, &$add);
	    $this->_keep_helper('address2', $new, $old, &$diff, &$add);
	    $this->_keep_helper('city', $new, $old, &$diff, &$add);
	    $this->_keep_helper('state', $new, $old, &$diff, &$add);
	    $this->_keep_helper('zipcode', $new, $old, &$diff, &$add);

	     $this->_keep_helper('website', $new, $old, &$diff, &$add);
	     $this->_keep_helper('phone', $new, $old, &$diff, &$add, 'main_phone');
	     $this->_keep_helper('email', $new, $old, &$diff, &$add, 'main_email');
	     $this->_keep_helper('main_contact', $new, $old, &$diff, &$add);
	     $this->_keep_helper('neighborhood_served', $new, $old, &$diff, &$add);

/* if all fields are identical, just use the old location without asking the user */
	    if (empty($diff) && empty($add))
	        return $old['id'];

/* all mutual fields are identical, some to be added */
            if (empty($diff) && !empty($add)) {
	        if (!mysql_query("UPDATE $table set " . implode(", ", $add) . " WHERE id = " . $old['id'] . ";"))
	            die(__CLASS__."::".__FUNCTION__." failed on line ".__LINE__."\n".mysql_error()."\n");
		
		return $old['id'];
	    }
	} // end WHILE loop.  note: this means that if the user is being prompted for input, we are comparing
	  // against the last of the matching entries

/* nothing to add, some differences in existing fields */
	if (!empty($diff) && empty($add)) {
	        
           /* if more than two fields differ and this is an agency,
	    * go ahead and make a new agency */
            if (count($diff) > 2 && $table == 'tmp_sites')
	        return null;
	
            /* if we are looking at a location and the only difference is the name, don't make a duplicate */
            #if ($diff === array('name') && $table == 'tmp_locations')
	        #return $old['id'];
            
	    /* else prompt the user */
        }

        /* at this point we know that !empty($diff) AND !empty($add) */
        if ($diff === array('zipcode')) {
	    if (!mysql_query("UPDATE $table set " . implode(", ", $add) . " WHERE id = " . $old['id'] . ";"))
	        die(__CLASS__."::".__FUNCTION__." failed on line ".__LINE__."\n".mysql_error()."\n");
	    
	    return $old['id'];
	}
	    
	/* if there are too many differences, just go ahead and make a new agency */
	#$keep =  $this->promptUser($new, $old, $diff, $table);
	#if ($keep)
	    return null;
	
	// else update the existing agency, if we have any new information about it
	if (!empty($add)) {
	    if (!mysql_query("UPDATE $table set " . implode(", ", $add) . " WHERE id = " . $old['id'] . ";"))
	        die(__CLASS__."::".__FUNCTION__." failed on line ".__LINE__."\n".mysql_error()."\n");
        }

	return $old['id'];
    }

    


    private function promptUser($site, $duplicate, $diff, $table) {

	print("\n**************************************************************************************************\n");
	print("Working on " . (isset($site['site_id']) ? 'location' : 'site') . " " . $site['id'] . "\n");
	print("Comparing with " . (isset($site['site_id']) ? 'tmp_location' : 'tmp_site') . " " .$duplicate['id'] . "\n\n");
        print("There is a proposed new $table that closely matches an existing $table\n");
	print("Here are the differences:\n\n");

	printf("%40s\n", "EXISTING $table");
	foreach ($diff as $key) {
	    printf("%20s: %s\n", $key, (isset($duplicate[$key]) ? $duplicate[$key] : "NULL" ));
	}
	printf("%20s: %s\n", "lat", $site['lat']);
	printf("%20s: %s\n", "lng", $site['lng']);


	printf("\n%40s\n", "PROPOSED NEW $table");
	foreach ($diff as $key) {
	    printf("%20s: %s\n", $key, (isset($site[$key]) ? $site[$key] : "NULL"));
	}
	printf("%20s: %s\n", "lat", $duplicate['lat']);
	printf("%20s: %s\n", "lng", $duplicate['lng']);



	return $this->getResponse($table);
    }




    private function getResponse($table) {
	print("\nWould you like to add the proposed new $table to the database?\n");
	print("(y)es or (n)o\n");
	print("[y] ");

	$input = strtolower(trim(fgets(STDIN, 200)));
	switch ($input) {
	    case 'n':
	    case 'no':
		return false;
	    
	    case '':
	    case 'y':
	    case 'yes':
		return true;
	    
	    default:
	        echo "\nSorry, I didn't understand your answer.\n\n";
	        return $this->getResponse($table);
	}
    }

    private function _keep_helper ($field, $new, $old, $diff, $add, $alt_field_name = null) {

	if (isset($new[$field]) && trim($new[$field]) != '') {
	    $this->_check_fields($field, $field, $new, $old, &$diff, &$add);
	}

	else if ($alt_field_name != null && isset($new[$alt_field_name]) && trim($new[$alt_field_name]) != '') {
	    $this->_check_fields($alt_field_name, $field, $new, $old, &$diff, &$add);
	}
    }

    private function _check_fields ($newfield, $oldfield, $new, $old, $diff, $add) {
        $new_val = $this->_sanitize($new[$newfield]);
	if (! isset($old[$oldfield]) || $old[$oldfield] == '') {
	    $add[] = "$oldfield = $new_val";
	}
        else if ($oldfield == 'zipcode' && isset($old[$oldfield]) && $old[$oldfield] == '0') {
            $add[] = "$oldfield = $new_val";
	}
	else if ($oldfield == 'phone') {
	    if ($this->_normalize($new_val) != $this->_normalize($old[$oldfield])) {
	         $new_val = $this->_sanitize($old[$oldfield] . ", " . $new_val);
	         $add[] = "phone = $new_val";
            }
	}
	else if ($this->_normalize($new_val) != $this->_normalize($old[$oldfield])) {
	    // check if one is a substring of the other
	    if (strpos(' '.$this->_normalize($new_val), $this->_normalize($old[$oldfield]))) {
	        $add[] = "$oldfield = $new_val";
	    }
	    else if (strpos(' '.$this->_normalize($old[$oldfield]), $this->_normalize($new_val)))
	        ; //do nothing
	    else $diff[] = $newfield;
	}

    }

    private function _make_helper($name, $site, $fields, $values, $altname = null) {
	if ($altname != null) 
	    $site_name = $altname;
	else 
	    $site_name = $name;

        if (isset($site[$site_name]) && trim($site[$site_name]) != '') {
	    $fields[] = $name;
	    $new_val = $this->_sanitize($site[$site_name]);
	
	    $values[] = $new_val;
	}
    }

    private function _sanitize ($val) {
        if (is_string($val)) {
	    $val = trim($val);
	    $val = preg_replace("/\"/", "", $val);
	    $val = '"' . $val . '"';
	 }
	 return $val;
    }

    function _normalize ($val) {
        if (is_string($val)) {
	    /* normalize phone numbers */
	    $val = preg_replace("/(1\-|1 )?\(?(\d\d\d)(\)? ?|-|\.)(\d\d\d)(-?|\.)(\d\d\d\d)/", "$2 $4 $6", $val);

            $val = strtolower($val);
	    $val = preg_replace("/[\.\",\-\/]/", "", $val);

	    $stopWords = array('st', 'street', 'ave', 'avenue',
	                       'dr', 'drive', 'pl', 'place', 'rd',
			       'road', 'usa'
	    );

	    $replacements = array('w'=>'west',
		'n'=>'north',
		'e'=>'east',
		's'=>'south',
		'1st'=>'first',
	        '2nd'=>'second'
            );

	    $val = preg_replace("/[ ]+/", " ", $val);

	    $val = $this->_replace($replacements, $val);
	    $val = $this->_delStopWords($stopWords, $val);

	}

	return trim($val);
    }

    private function _delStopWords($stopWords, $val) {
        foreach ($stopWords as $word) {
            $val =  preg_replace("/ $word( |$)/", " ", $val);
	}
	return $val;
    }

    private function _replace($replacements, $val) {
        foreach ($replacements as $key => $replacement) {
	    $val = preg_replace("/ $key( |$)/", " $replacement$1", $val);
	}

	return $val;
    }

}

 $slMerge = new SLMerge(null, true, true);

 $slMerge->merge();
Back to Top