PageRenderTime 81ms CodeModel.GetById 11ms app.highlight 58ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/gc/mark.c

http://github.com/feyeleanor/RubyGoLightly
C | 1867 lines | 1349 code | 171 blank | 347 comment | 316 complexity | 6685c52592117f6d166c04bfe92d8c4f MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1
   2/*
   3 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
   4 * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
   5 * Copyright (c) 2000 by Hewlett-Packard Company.  All rights reserved.
   6 *
   7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   8 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   9 *
  10 * Permission is hereby granted to use or copy this program
  11 * for any purpose,  provided the above notices are retained on all copies.
  12 * Permission to modify the code and to distribute modified code is granted,
  13 * provided the above notices are retained, and a notice that the code was
  14 * modified is included with the above copyright notice.
  15 *
  16 */
  17
  18
  19# include <stdio.h>
  20# include "private/gc_pmark.h"
  21
  22#if defined(MSWIN32) && defined(__GNUC__)
  23# include <excpt.h>
  24#endif
  25
  26/* We put this here to minimize the risk of inlining. */
  27/*VARARGS*/
  28#ifdef __WATCOMC__
  29  void GC_noop(void *p, ...) {}
  30#else
  31  void GC_noop() {}
  32#endif
  33
  34/* Single argument version, robust against whole program analysis. */
  35void GC_noop1(word x)
  36{
  37    static volatile word sink;
  38
  39    sink = x;
  40}
  41
  42/* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
  43
  44unsigned GC_n_mark_procs = GC_RESERVED_MARK_PROCS;
  45
  46/* Initialize GC_obj_kinds properly and standard free lists properly.  	*/
  47/* This must be done statically since they may be accessed before 	*/
  48/* GC_init is called.							*/
  49/* It's done here, since we need to deal with mark descriptors.		*/
  50struct obj_kind GC_obj_kinds[MAXOBJKINDS] = {
  51/* PTRFREE */ { &GC_aobjfreelist[0], 0 /* filled in dynamically */,
  52		0 | GC_DS_LENGTH, FALSE, FALSE },
  53/* NORMAL  */ { &GC_objfreelist[0], 0,
  54		0 | GC_DS_LENGTH,  /* Adjusted in GC_init_inner for EXTRA_BYTES */
  55		TRUE /* add length to descr */, TRUE },
  56/* UNCOLLECTABLE */
  57	      { &GC_uobjfreelist[0], 0,
  58		0 | GC_DS_LENGTH, TRUE /* add length to descr */, TRUE },
  59# ifdef ATOMIC_UNCOLLECTABLE
  60   /* AUNCOLLECTABLE */
  61	      { &GC_auobjfreelist[0], 0,
  62		0 | GC_DS_LENGTH, FALSE /* add length to descr */, FALSE },
  63# endif
  64# ifdef STUBBORN_ALLOC
  65/*STUBBORN*/ { &GC_sobjfreelist[0], 0,
  66		0 | GC_DS_LENGTH, TRUE /* add length to descr */, TRUE },
  67# endif
  68};
  69
  70# ifdef ATOMIC_UNCOLLECTABLE
  71#   ifdef STUBBORN_ALLOC
  72      unsigned GC_n_kinds = 5;
  73#   else
  74      unsigned GC_n_kinds = 4;
  75#   endif
  76# else
  77#   ifdef STUBBORN_ALLOC
  78      unsigned GC_n_kinds = 4;
  79#   else
  80      unsigned GC_n_kinds = 3;
  81#   endif
  82# endif
  83
  84
  85# ifndef INITIAL_MARK_STACK_SIZE
  86#   define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
  87		/* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a 	*/
  88		/* multiple of HBLKSIZE.				*/
  89		/* The incremental collector actually likes a larger	*/
  90		/* size, since it want to push all marked dirty objs	*/
  91		/* before marking anything new.  Currently we let it	*/
  92		/* grow dynamically.					*/
  93# endif
  94
  95/*
  96 * Limits of stack for GC_mark routine.
  97 * All ranges between GC_mark_stack(incl.) and GC_mark_stack_top(incl.) still
  98 * need to be marked from.
  99 */
 100
 101word GC_n_rescuing_pages;	/* Number of dirty pages we marked from */
 102				/* excludes ptrfree pages, etc.		*/
 103
 104mse * GC_mark_stack;
 105
 106mse * GC_mark_stack_limit;
 107
 108size_t GC_mark_stack_size = 0;
 109 
 110#ifdef PARALLEL_MARK
 111# include "atomic_ops.h"
 112
 113  mse * volatile GC_mark_stack_top;
 114  /* Updated only with mark lock held, but read asynchronously.	*/
 115  volatile AO_t GC_first_nonempty;
 116  	/* Lowest entry on mark stack	*/
 117	/* that may be nonempty.	*/
 118	/* Updated only by initiating 	*/
 119	/* thread.			*/
 120#else
 121  mse * GC_mark_stack_top;
 122#endif
 123
 124static struct hblk * scan_ptr;
 125
 126mark_state_t GC_mark_state = MS_NONE;
 127
 128GC_bool GC_mark_stack_too_small = FALSE;
 129
 130GC_bool GC_objects_are_marked = FALSE;	/* Are there collectable marked	*/
 131					/* objects in the heap?		*/
 132
 133/* Is a collection in progress?  Note that this can return true in the	*/
 134/* nonincremental case, if a collection has been abandoned and the	*/
 135/* mark state is now MS_INVALID.					*/
 136GC_bool GC_collection_in_progress(void)
 137{
 138    return(GC_mark_state != MS_NONE);
 139}
 140
 141/* clear all mark bits in the header */
 142void GC_clear_hdr_marks(hdr *hhdr)
 143{
 144    size_t last_bit = FINAL_MARK_BIT(hhdr -> hb_sz);
 145
 146#   ifdef USE_MARK_BYTES
 147      BZERO(hhdr -> hb_marks, MARK_BITS_SZ);
 148      hhdr -> hb_marks[last_bit] = 1;
 149#   else
 150      BZERO(hhdr -> hb_marks, MARK_BITS_SZ*sizeof(word));
 151      set_mark_bit_from_hdr(hhdr, last_bit);
 152#   endif
 153    hhdr -> hb_n_marks = 0;
 154}
 155
 156/* Set all mark bits in the header.  Used for uncollectable blocks. */
 157void GC_set_hdr_marks(hdr *hhdr)
 158{
 159    unsigned i;
 160    size_t sz = hhdr -> hb_sz;
 161    size_t n_marks = FINAL_MARK_BIT(sz);
 162
 163#   ifdef USE_MARK_BYTES
 164      for (i = 0; i <= n_marks; i += MARK_BIT_OFFSET(sz)) {
 165    	hhdr -> hb_marks[i] = 1;
 166      }
 167#   else
 168      for (i = 0; i < divWORDSZ(n_marks + WORDSZ); ++i) {
 169    	hhdr -> hb_marks[i] = ONES;
 170      }
 171#   endif
 172#   ifdef MARK_BIT_PER_OBJ
 173      hhdr -> hb_n_marks = n_marks - 1;
 174#   else
 175      hhdr -> hb_n_marks = HBLK_OBJS(sz);
 176#   endif
 177}
 178
 179/*
 180 * Clear all mark bits associated with block h.
 181 */
 182/*ARGSUSED*/
 183static void clear_marks_for_block(struct hblk *h, word dummy)
 184{
 185    register hdr * hhdr = HDR(h);
 186    
 187    if (IS_UNCOLLECTABLE(hhdr -> hb_obj_kind)) return;
 188        /* Mark bit for these is cleared only once the object is 	*/
 189        /* explicitly deallocated.  This either frees the block, or	*/
 190        /* the bit is cleared once the object is on the free list.	*/
 191    GC_clear_hdr_marks(hhdr);
 192}
 193
 194/* Slow but general routines for setting/clearing/asking about mark bits */
 195void GC_set_mark_bit(ptr_t p)
 196{
 197    struct hblk *h = HBLKPTR(p);
 198    hdr * hhdr = HDR(h);
 199    word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
 200    
 201    if (!mark_bit_from_hdr(hhdr, bit_no)) {
 202      set_mark_bit_from_hdr(hhdr, bit_no);
 203      ++hhdr -> hb_n_marks;
 204    }
 205}
 206
 207void GC_clear_mark_bit(ptr_t p)
 208{
 209    struct hblk *h = HBLKPTR(p);
 210    hdr * hhdr = HDR(h);
 211    word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
 212    
 213    if (mark_bit_from_hdr(hhdr, bit_no)) {
 214      size_t n_marks;
 215      clear_mark_bit_from_hdr(hhdr, bit_no);
 216      n_marks = hhdr -> hb_n_marks - 1;
 217#     ifdef PARALLEL_MARK
 218        if (n_marks != 0)
 219          hhdr -> hb_n_marks = n_marks; 
 220        /* Don't decrement to zero.  The counts are approximate due to	*/
 221        /* concurrency issues, but we need to ensure that a count of 	*/
 222        /* zero implies an empty block.					*/
 223#     else
 224          hhdr -> hb_n_marks = n_marks; 
 225#     endif
 226    }
 227}
 228
 229GC_bool GC_is_marked(ptr_t p)
 230{
 231    struct hblk *h = HBLKPTR(p);
 232    hdr * hhdr = HDR(h);
 233    word bit_no = MARK_BIT_NO(p - (ptr_t)h, hhdr -> hb_sz);
 234    
 235    return((GC_bool)mark_bit_from_hdr(hhdr, bit_no));
 236}
 237
 238
 239/*
 240 * Clear mark bits in all allocated heap blocks.  This invalidates
 241 * the marker invariant, and sets GC_mark_state to reflect this.
 242 * (This implicitly starts marking to reestablish the invariant.)
 243 */
 244void GC_clear_marks(void)
 245{
 246    GC_apply_to_all_blocks(clear_marks_for_block, (word)0);
 247    GC_objects_are_marked = FALSE;
 248    GC_mark_state = MS_INVALID;
 249    scan_ptr = 0;
 250}
 251
 252/* Initiate a garbage collection.  Initiates a full collection if the	*/
 253/* mark	state is invalid.						*/
 254/*ARGSUSED*/
 255void GC_initiate_gc(void)
 256{
 257    if (GC_dirty_maintained) GC_read_dirty();
 258#   ifdef STUBBORN_ALLOC
 259    	GC_read_changed();
 260#   endif
 261#   ifdef CHECKSUMS
 262	{
 263	    extern void GC_check_dirty();
 264	    
 265	    if (GC_dirty_maintained) GC_check_dirty();
 266	}
 267#   endif
 268    GC_n_rescuing_pages = 0;
 269    if (GC_mark_state == MS_NONE) {
 270        GC_mark_state = MS_PUSH_RESCUERS;
 271    } else if (GC_mark_state != MS_INVALID) {
 272    	ABORT("unexpected state");
 273    } /* else this is really a full collection, and mark	*/
 274      /* bits are invalid.					*/
 275    scan_ptr = 0;
 276}
 277
 278
 279static void alloc_mark_stack(size_t);
 280
 281# if defined(MSWIN32) || defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)
 282    /* Under rare conditions, we may end up marking from nonexistent memory. */
 283    /* Hence we need to be prepared to recover by running GC_mark_some	     */
 284    /* with a suitable handler in place.				     */
 285#   define WRAP_MARK_SOME
 286# endif
 287
 288/* Perform a small amount of marking.			*/
 289/* We try to touch roughly a page of memory.		*/
 290/* Return TRUE if we just finished a mark phase.	*/
 291/* Cold_gc_frame is an address inside a GC frame that	*/
 292/* remains valid until all marking is complete.		*/
 293/* A zero value indicates that it's OK to miss some	*/
 294/* register values.					*/
 295/* We hold the allocation lock.  In the case of 	*/
 296/* incremental collection, the world may not be stopped.*/
 297#ifdef WRAP_MARK_SOME
 298  /* For win32, this is called after we establish a structured	*/
 299  /* exception handler, in case Windows unmaps one of our root	*/
 300  /* segments.  See below.  In either case, we acquire the 	*/
 301  /* allocator lock long before we get here.			*/
 302  GC_bool GC_mark_some_inner(ptr_t cold_gc_frame)
 303#else
 304  GC_bool GC_mark_some(ptr_t cold_gc_frame)
 305#endif
 306{
 307    switch(GC_mark_state) {
 308    	case MS_NONE:
 309    	    return(FALSE);
 310    	    
 311    	case MS_PUSH_RESCUERS:
 312    	    if (GC_mark_stack_top
 313    	        >= GC_mark_stack_limit - INITIAL_MARK_STACK_SIZE/2) {
 314		/* Go ahead and mark, even though that might cause us to */
 315		/* see more marked dirty objects later on.  Avoid this	 */
 316		/* in the future.					 */
 317		GC_mark_stack_too_small = TRUE;
 318    	        MARK_FROM_MARK_STACK();
 319    	        return(FALSE);
 320    	    } else {
 321    	        scan_ptr = GC_push_next_marked_dirty(scan_ptr);
 322    	        if (scan_ptr == 0) {
 323		    if (GC_print_stats) {
 324			GC_log_printf("Marked from %u dirty pages\n",
 325				      GC_n_rescuing_pages);
 326		    }
 327    	    	    GC_push_roots(FALSE, cold_gc_frame);
 328    	    	    GC_objects_are_marked = TRUE;
 329    	    	    if (GC_mark_state != MS_INVALID) {
 330    	    	        GC_mark_state = MS_ROOTS_PUSHED;
 331    	    	    }
 332    	    	}
 333    	    }
 334    	    return(FALSE);
 335    	
 336    	case MS_PUSH_UNCOLLECTABLE:
 337    	    if (GC_mark_stack_top
 338    	        >= GC_mark_stack + GC_mark_stack_size/4) {
 339#		ifdef PARALLEL_MARK
 340		  /* Avoid this, since we don't parallelize the marker	*/
 341		  /* here.						*/
 342		  if (GC_parallel) GC_mark_stack_too_small = TRUE;
 343#		endif
 344    	        MARK_FROM_MARK_STACK();
 345    	        return(FALSE);
 346    	    } else {
 347    	        scan_ptr = GC_push_next_marked_uncollectable(scan_ptr);
 348    	        if (scan_ptr == 0) {
 349    	    	    GC_push_roots(TRUE, cold_gc_frame);
 350    	    	    GC_objects_are_marked = TRUE;
 351    	    	    if (GC_mark_state != MS_INVALID) {
 352    	    	        GC_mark_state = MS_ROOTS_PUSHED;
 353    	    	    }
 354    	    	}
 355    	    }
 356    	    return(FALSE);
 357    	
 358    	case MS_ROOTS_PUSHED:
 359#	    ifdef PARALLEL_MARK
 360	      /* In the incremental GC case, this currently doesn't	*/
 361	      /* quite do the right thing, since it runs to		*/
 362	      /* completion.  On the other hand, starting a		*/
 363	      /* parallel marker is expensive, so perhaps it is		*/
 364	      /* the right thing?					*/
 365	      /* Eventually, incremental marking should run		*/
 366	      /* asynchronously in multiple threads, without grabbing	*/
 367	      /* the allocation lock.					*/
 368	        if (GC_parallel) {
 369		  GC_do_parallel_mark();
 370		  GC_ASSERT(GC_mark_stack_top < (mse *)GC_first_nonempty);
 371		  GC_mark_stack_top = GC_mark_stack - 1;
 372    	          if (GC_mark_stack_too_small) {
 373    	            alloc_mark_stack(2*GC_mark_stack_size);
 374    	          }
 375		  if (GC_mark_state == MS_ROOTS_PUSHED) {
 376    	            GC_mark_state = MS_NONE;
 377    	            return(TRUE);
 378		  } else {
 379		    return(FALSE);
 380	          }
 381		}
 382#	    endif
 383    	    if (GC_mark_stack_top >= GC_mark_stack) {
 384    	        MARK_FROM_MARK_STACK();
 385    	        return(FALSE);
 386    	    } else {
 387    	        GC_mark_state = MS_NONE;
 388    	        if (GC_mark_stack_too_small) {
 389    	            alloc_mark_stack(2*GC_mark_stack_size);
 390    	        }
 391    	        return(TRUE);
 392    	    }
 393    	    
 394    	case MS_INVALID:
 395    	case MS_PARTIALLY_INVALID:
 396	    if (!GC_objects_are_marked) {
 397		GC_mark_state = MS_PUSH_UNCOLLECTABLE;
 398		return(FALSE);
 399	    }
 400    	    if (GC_mark_stack_top >= GC_mark_stack) {
 401    	        MARK_FROM_MARK_STACK();
 402    	        return(FALSE);
 403    	    }
 404    	    if (scan_ptr == 0 && GC_mark_state == MS_INVALID) {
 405		/* About to start a heap scan for marked objects. */
 406		/* Mark stack is empty.  OK to reallocate.	  */
 407		if (GC_mark_stack_too_small) {
 408    	            alloc_mark_stack(2*GC_mark_stack_size);
 409		}
 410		GC_mark_state = MS_PARTIALLY_INVALID;
 411    	    }
 412    	    scan_ptr = GC_push_next_marked(scan_ptr);
 413    	    if (scan_ptr == 0 && GC_mark_state == MS_PARTIALLY_INVALID) {
 414    	    	GC_push_roots(TRUE, cold_gc_frame);
 415    	    	GC_objects_are_marked = TRUE;
 416    	    	if (GC_mark_state != MS_INVALID) {
 417    	    	    GC_mark_state = MS_ROOTS_PUSHED;
 418    	    	}
 419    	    }
 420    	    return(FALSE);
 421    	default:
 422    	    ABORT("GC_mark_some: bad state");
 423    	    return(FALSE);
 424    }
 425}
 426
 427
 428#if defined(MSWIN32) && defined(__GNUC__)
 429
 430    typedef struct {
 431      EXCEPTION_REGISTRATION ex_reg;
 432      void *alt_path;
 433    } ext_ex_regn;
 434
 435
 436    static EXCEPTION_DISPOSITION mark_ex_handler(
 437        struct _EXCEPTION_RECORD *ex_rec, 
 438        void *est_frame,
 439        struct _CONTEXT *context,
 440        void *disp_ctxt)
 441    {
 442        if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) {
 443          ext_ex_regn *xer = (ext_ex_regn *)est_frame;
 444
 445          /* Unwind from the inner function assuming the standard */
 446          /* function prologue.                                   */
 447          /* Assumes code has not been compiled with              */
 448          /* -fomit-frame-pointer.                                */
 449          context->Esp = context->Ebp;
 450          context->Ebp = *((DWORD *)context->Esp);
 451          context->Esp = context->Esp - 8;
 452
 453          /* Resume execution at the "real" handler within the    */
 454          /* wrapper function.                                    */
 455          context->Eip = (DWORD )(xer->alt_path);
 456
 457          return ExceptionContinueExecution;
 458
 459        } else {
 460            return ExceptionContinueSearch;
 461        }
 462    }
 463# endif /* __GNUC__  && MSWIN32 */
 464
 465#ifdef GC_WIN32_THREADS
 466  extern GC_bool GC_started_thread_while_stopped(void);
 467  /* In win32_threads.c.  Did we invalidate mark phase with an	*/
 468  /* unexpected thread start?					*/
 469#endif
 470
 471# ifdef WRAP_MARK_SOME
 472  GC_bool GC_mark_some(ptr_t cold_gc_frame)
 473  {
 474      GC_bool ret_val;
 475
 476#   ifdef MSWIN32
 477#    ifndef __GNUC__
 478      /* Windows 98 appears to asynchronously create and remove  */
 479      /* writable memory mappings, for reasons we haven't yet    */
 480      /* understood.  Since we look for writable regions to      */
 481      /* determine the root set, we may try to mark from an      */
 482      /* address range that disappeared since we started the     */
 483      /* collection.  Thus we have to recover from faults here.  */
 484      /* This code does not appear to be necessary for Windows   */
 485      /* 95/NT/2000. Note that this code should never generate   */
 486      /* an incremental GC write fault.                          */
 487      /* It's conceivable that this is the same issue with	 */
 488      /* terminating threads that we see with Linux and		 */
 489      /* USE_PROC_FOR_LIBRARIES.				 */
 490
 491      __try {
 492          ret_val = GC_mark_some_inner(cold_gc_frame);
 493      } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
 494                EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
 495	  goto handle_ex;
 496      }
 497#     ifdef GC_WIN32_THREADS
 498	/* With DllMain-based thread tracking, a thread may have	*/
 499	/* started while we were marking.  This is logically equivalent	*/
 500	/* to the exception case; our results are invalid and we have	*/
 501	/* to start over.  This cannot be prevented since we can't	*/
 502	/* block in DllMain.						*/
 503	if (GC_started_thread_while_stopped()) goto handle_ex;
 504#     endif
 505     rm_handler:
 506      return ret_val;
 507
 508#    else /* __GNUC__ */
 509
 510      /* Manually install an exception handler since GCC does    */
 511      /* not yet support Structured Exception Handling (SEH) on  */
 512      /* Win32.                                                  */
 513
 514      ext_ex_regn er;
 515
 516      er.alt_path = &&handle_ex;
 517      er.ex_reg.handler = mark_ex_handler;
 518      asm volatile ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev));
 519      asm volatile ("movl %0, %%fs:0" : : "r" (&er));
 520      ret_val = GC_mark_some_inner(cold_gc_frame);
 521      /* Prevent GCC from considering the following code unreachable */
 522      /* and thus eliminating it.                                    */
 523        if (er.alt_path == 0)
 524          goto handle_ex;
 525    rm_handler:
 526      /* Uninstall the exception handler */
 527      asm volatile ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev));
 528      return ret_val;
 529
 530#    endif /* __GNUC__ */
 531#   else /* !MSWIN32 */
 532      /* Here we are handling the case in which /proc is used for root	*/
 533      /* finding, and we have threads.  We may find a stack for a	*/
 534      /* thread that is in the process of exiting, and disappears	*/
 535      /* while we are marking it.  This seems extremely difficult to	*/
 536      /* avoid otherwise.						*/
 537      if (GC_incremental)
 538	      WARN("Incremental GC incompatible with /proc roots\n", 0);
 539      	/* I'm not sure if this could still work ...	*/
 540      GC_setup_temporary_fault_handler();
 541      if(SETJMP(GC_jmp_buf) != 0) goto handle_ex;
 542      ret_val = GC_mark_some_inner(cold_gc_frame);
 543    rm_handler:
 544      GC_reset_fault_handler();
 545      return ret_val;
 546      
 547#   endif /* !MSWIN32 */
 548
 549handle_ex:
 550    /* Exception handler starts here for all cases. */
 551      if (GC_print_stats) {
 552        GC_log_printf("Caught ACCESS_VIOLATION in marker. "
 553		      "Memory mapping disappeared.\n");
 554      }
 555
 556      /* We have bad roots on the stack.  Discard mark stack.	*/
 557      /* Rescan from marked objects.  Redetermine roots.	*/
 558      GC_invalidate_mark_state();	
 559      scan_ptr = 0;
 560
 561      ret_val = FALSE;
 562      goto rm_handler;  // Back to platform-specific code.
 563  }
 564#endif /* WRAP_MARK_SOME */
 565
 566
 567GC_bool GC_mark_stack_empty(void)
 568{
 569    return(GC_mark_stack_top < GC_mark_stack);
 570}	
 571
 572void GC_invalidate_mark_state(void)
 573{
 574    GC_mark_state = MS_INVALID;
 575    GC_mark_stack_top = GC_mark_stack-1;
 576}
 577
 578mse * GC_signal_mark_stack_overflow(mse *msp)
 579{
 580    GC_mark_state = MS_INVALID;
 581    GC_mark_stack_too_small = TRUE;
 582    if (GC_print_stats) {
 583	GC_log_printf("Mark stack overflow; current size = %lu entries\n",
 584	    	      GC_mark_stack_size);
 585    }
 586    return(msp - GC_MARK_STACK_DISCARDS);
 587}
 588
 589/*
 590 * Mark objects pointed to by the regions described by
 591 * mark stack entries between mark_stack and mark_stack_top,
 592 * inclusive.  Assumes the upper limit of a mark stack entry
 593 * is never 0.  A mark stack entry never has size 0.
 594 * We try to traverse on the order of a hblk of memory before we return.
 595 * Caller is responsible for calling this until the mark stack is empty.
 596 * Note that this is the most performance critical routine in the
 597 * collector.  Hence it contains all sorts of ugly hacks to speed
 598 * things up.  In particular, we avoid procedure calls on the common
 599 * path, we take advantage of peculiarities of the mark descriptor
 600 * encoding, we optionally maintain a cache for the block address to
 601 * header mapping, we prefetch when an object is "grayed", etc. 
 602 */
 603mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack, mse *mark_stack_limit)
 604{
 605  signed_word credit = HBLKSIZE;  /* Remaining credit for marking work	*/
 606  ptr_t current_p;	/* Pointer to current candidate ptr.	*/
 607  word current;	/* Candidate pointer.			*/
 608  ptr_t limit;	/* (Incl) limit of current candidate 	*/
 609  				/* range				*/
 610  word descr;
 611  ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
 612  ptr_t least_ha = GC_least_plausible_heap_addr;
 613  DECLARE_HDR_CACHE;
 614
 615# define SPLIT_RANGE_WORDS 128  /* Must be power of 2.		*/
 616
 617  GC_objects_are_marked = TRUE;
 618  INIT_HDR_CACHE;
 619# ifdef OS2 /* Use untweaked version to circumvent compiler problem */
 620  while (mark_stack_top >= mark_stack && credit >= 0) {
 621# else
 622  while ((((ptr_t)mark_stack_top - (ptr_t)mark_stack) | credit)
 623  	>= 0) {
 624# endif
 625    current_p = mark_stack_top -> mse_start;
 626    descr = mark_stack_top -> mse_descr;
 627  retry:
 628    /* current_p and descr describe the current object.		*/
 629    /* *mark_stack_top is vacant.				*/
 630    /* The following is 0 only for small objects described by a simple	*/
 631    /* length descriptor.  For many applications this is the common	*/
 632    /* case, so we try to detect it quickly.				*/
 633    if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | GC_DS_TAGS)) {
 634      word tag = descr & GC_DS_TAGS;
 635      
 636      switch(tag) {
 637        case GC_DS_LENGTH:
 638          /* Large length.					        */
 639          /* Process part of the range to avoid pushing too much on the	*/
 640          /* stack.							*/
 641	  GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr
 642			    - (word)GC_least_plausible_heap_addr);
 643#	  ifdef ENABLE_TRACE
 644	    if (GC_trace_addr >= current_p
 645		&& GC_trace_addr < current_p + descr) {
 646		GC_log_printf("GC:%d Large section; start %p len %lu\n",
 647			      GC_gc_no, current_p, (unsigned long) descr);
 648	    }
 649#	  endif /* ENABLE_TRACE */
 650#	  ifdef PARALLEL_MARK
 651#	    define SHARE_BYTES 2048
 652	    if (descr > SHARE_BYTES && GC_parallel
 653		&& mark_stack_top < mark_stack_limit - 1) {
 654	      int new_size = (descr/2) & ~(sizeof(word)-1);
 655	      mark_stack_top -> mse_start = current_p;
 656	      mark_stack_top -> mse_descr = new_size + sizeof(word);
 657					/* makes sure we handle 	*/
 658					/* misaligned pointers.		*/
 659	      mark_stack_top++;
 660#	      ifdef ENABLE_TRACE
 661	        if (GC_trace_addr >= current_p
 662		    && GC_trace_addr < current_p + descr) {
 663		    GC_log_printf("GC:%d splitting (parallel) %p at %p\n",
 664				  GC_gc_no, current_p, current_p + new_size);
 665	        }
 666#	      endif /* ENABLE_TRACE */
 667	      current_p += new_size;
 668	      descr -= new_size;
 669	      goto retry;
 670	    }
 671#	  endif /* PARALLEL_MARK */
 672          mark_stack_top -> mse_start =
 673         	limit = current_p + WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
 674          mark_stack_top -> mse_descr =
 675          		descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1);
 676#	  ifdef ENABLE_TRACE
 677	    if (GC_trace_addr >= current_p
 678		&& GC_trace_addr < current_p + descr) {
 679		GC_log_printf("GC:%d splitting %p at %p\n",
 680			      GC_gc_no, current_p, limit);
 681	    }
 682#	  endif /* ENABLE_TRACE */
 683          /* Make sure that pointers overlapping the two ranges are	*/
 684          /* considered. 						*/
 685          limit += sizeof(word) - ALIGNMENT;
 686          break;
 687        case GC_DS_BITMAP:
 688          mark_stack_top--;
 689#	  ifdef ENABLE_TRACE
 690	    if (GC_trace_addr >= current_p
 691		&& GC_trace_addr < current_p + WORDS_TO_BYTES(WORDSZ-2)) {
 692		GC_log_printf("GC:%d Tracing from %p bitmap descr %lu\n",
 693			      GC_gc_no, current_p, (unsigned long) descr);
 694	    }
 695#	  endif /* ENABLE_TRACE */
 696          descr &= ~GC_DS_TAGS;
 697          credit -= WORDS_TO_BYTES(WORDSZ/2); /* guess */
 698          while (descr != 0) {
 699            if ((signed_word)descr < 0) {
 700              current = *(word *)current_p;
 701	      FIXUP_POINTER(current);
 702	      if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) {
 703		PREFETCH((ptr_t)current);
 704#               ifdef ENABLE_TRACE
 705	          if (GC_trace_addr == current_p) {
 706	            GC_log_printf("GC:%d Considering(3) %p -> %p\n",
 707			          GC_gc_no, current_p, (ptr_t) current);
 708	          }
 709#               endif /* ENABLE_TRACE */
 710                PUSH_CONTENTS((ptr_t)current, mark_stack_top,
 711			      mark_stack_limit, current_p, exit1);
 712	      }
 713            }
 714	    descr <<= 1;
 715	    current_p += sizeof(word);
 716          }
 717          continue;
 718        case GC_DS_PROC:
 719          mark_stack_top--;
 720#	  ifdef ENABLE_TRACE
 721	    if (GC_trace_addr >= current_p
 722		&& GC_base(current_p) != 0
 723		&& GC_base(current_p) == GC_base(GC_trace_addr)) {
 724		GC_log_printf("GC:%d Tracing from %p proc descr %lu\n",
 725			      GC_gc_no, current_p, (unsigned long) descr);
 726	    }
 727#	  endif /* ENABLE_TRACE */
 728          credit -= GC_PROC_BYTES;
 729          mark_stack_top =
 730              (*PROC(descr))
 731              	    ((word *)current_p, mark_stack_top,
 732              	    mark_stack_limit, ENV(descr));
 733          continue;
 734        case GC_DS_PER_OBJECT:
 735	  if ((signed_word)descr >= 0) {
 736	    /* Descriptor is in the object.	*/
 737            descr = *(word *)(current_p + descr - GC_DS_PER_OBJECT);
 738	  } else {
 739	    /* Descriptor is in type descriptor pointed to by first	*/
 740	    /* word in object.						*/
 741	    ptr_t type_descr = *(ptr_t *)current_p;
 742	    /* type_descr is either a valid pointer to the descriptor	*/
 743	    /* structure, or this object was on a free list.  If it 	*/
 744	    /* it was anything but the last object on the free list,	*/
 745	    /* we will misinterpret the next object on the free list as */
 746	    /* the type descriptor, and get a 0 GC descriptor, which	*/
 747	    /* is ideal.  Unfortunately, we need to check for the last	*/
 748	    /* object case explicitly.					*/
 749	    if (0 == type_descr) {
 750		/* Rarely executed.	*/
 751		mark_stack_top--;
 752		continue;
 753	    }
 754            descr = *(word *)(type_descr
 755			      - (descr + (GC_INDIR_PER_OBJ_BIAS
 756					  - GC_DS_PER_OBJECT)));
 757	  }
 758	  if (0 == descr) {
 759	      /* Can happen either because we generated a 0 descriptor	*/
 760	      /* or we saw a pointer to a free object.		*/
 761	      mark_stack_top--;
 762	      continue;
 763	  }
 764          goto retry;
 765      }
 766    } else /* Small object with length descriptor */ {
 767      mark_stack_top--;
 768      limit = current_p + (word)descr;
 769    }
 770#   ifdef ENABLE_TRACE
 771	if (GC_trace_addr >= current_p
 772	    && GC_trace_addr < limit) {
 773	    GC_log_printf("GC:%d Tracing from %p len %lu\n",
 774			  GC_gc_no, current_p, (unsigned long) descr);
 775	}
 776#   endif /* ENABLE_TRACE */
 777    /* The simple case in which we're scanning a range.	*/
 778    GC_ASSERT(!((word)current_p & (ALIGNMENT-1)));
 779    credit -= limit - current_p;
 780    limit -= sizeof(word);
 781    {
 782#     define PREF_DIST 4
 783
 784#     ifndef SMALL_CONFIG
 785        word deferred;
 786
 787	/* Try to prefetch the next pointer to be examined asap.	*/
 788	/* Empirically, this also seems to help slightly without	*/
 789	/* prefetches, at least on linux/X86.  Presumably this loop 	*/
 790	/* ends up with less register pressure, and gcc thus ends up 	*/
 791	/* generating slightly better code.  Overall gcc code quality	*/
 792	/* for this loop is still not great.				*/
 793	for(;;) {
 794	  PREFETCH(limit - PREF_DIST*CACHE_LINE_SIZE);
 795	  GC_ASSERT(limit >= current_p);
 796	  deferred = *(word *)limit;
 797	  FIXUP_POINTER(deferred);
 798	  limit -= ALIGNMENT;
 799	  if ((ptr_t)deferred >= least_ha && (ptr_t)deferred <  greatest_ha) {
 800	    PREFETCH((ptr_t)deferred);
 801	    break;
 802	  }
 803	  if (current_p > limit) goto next_object;
 804	  /* Unroll once, so we don't do too many of the prefetches 	*/
 805	  /* based on limit.						*/
 806	  deferred = *(word *)limit;
 807	  FIXUP_POINTER(deferred);
 808	  limit -= ALIGNMENT;
 809	  if ((ptr_t)deferred >= least_ha && (ptr_t)deferred <  greatest_ha) {
 810	    PREFETCH((ptr_t)deferred);
 811	    break;
 812	  }
 813	  if (current_p > limit) goto next_object;
 814	}
 815#     endif
 816
 817      while (current_p <= limit) {
 818	/* Empirically, unrolling this loop doesn't help a lot.	*/
 819	/* Since PUSH_CONTENTS expands to a lot of code,	*/
 820	/* we don't.						*/
 821        current = *(word *)current_p;
 822	FIXUP_POINTER(current);
 823        PREFETCH(current_p + PREF_DIST*CACHE_LINE_SIZE);
 824        if ((ptr_t)current >= least_ha && (ptr_t)current <  greatest_ha) {
 825  	  /* Prefetch the contents of the object we just pushed.  It's	*/
 826  	  /* likely we will need them soon.				*/
 827  	  PREFETCH((ptr_t)current);
 828#         ifdef ENABLE_TRACE
 829	    if (GC_trace_addr == current_p) {
 830	        GC_log_printf("GC:%d Considering(1) %p -> %p\n",
 831			      GC_gc_no, current_p, (ptr_t) current);
 832	    }
 833#         endif /* ENABLE_TRACE */
 834          PUSH_CONTENTS((ptr_t)current, mark_stack_top,
 835  		           mark_stack_limit, current_p, exit2);
 836        }
 837        current_p += ALIGNMENT;
 838      }
 839
 840#     ifndef SMALL_CONFIG
 841	/* We still need to mark the entry we previously prefetched.	*/
 842	/* We already know that it passes the preliminary pointer	*/
 843	/* validity test.						*/
 844#       ifdef ENABLE_TRACE
 845	    if (GC_trace_addr == current_p) {
 846	        GC_log_printf("GC:%d Considering(2) %p -> %p\n",
 847			      GC_gc_no, current_p, (ptr_t) deferred);
 848	    }
 849#       endif /* ENABLE_TRACE */
 850        PUSH_CONTENTS((ptr_t)deferred, mark_stack_top,
 851  		         mark_stack_limit, current_p, exit4);
 852	next_object:;
 853#     endif
 854    }
 855  }
 856  return mark_stack_top;
 857}
 858
 859#ifdef PARALLEL_MARK
 860
 861/* We assume we have an ANSI C Compiler.	*/
 862GC_bool GC_help_wanted = FALSE;
 863unsigned GC_helper_count = 0;
 864unsigned GC_active_count = 0;
 865word GC_mark_no = 0;
 866
 867#define LOCAL_MARK_STACK_SIZE HBLKSIZE
 868	/* Under normal circumstances, this is big enough to guarantee	*/
 869	/* We don't overflow half of it in a single call to 		*/
 870	/* GC_mark_from.						*/
 871
 872
 873/* Steal mark stack entries starting at mse low into mark stack local	*/
 874/* until we either steal mse high, or we have max entries.		*/
 875/* Return a pointer to the top of the local mark stack.		        */
 876/* *next is replaced by a pointer to the next unscanned mark stack	*/
 877/* entry.								*/
 878mse * GC_steal_mark_stack(mse * low, mse * high, mse * local,
 879			  unsigned max, mse **next)
 880{
 881    mse *p;
 882    mse *top = local - 1;
 883    unsigned i = 0;
 884
 885    GC_ASSERT(high >= low-1 && high - low + 1 <= GC_mark_stack_size);
 886    for (p = low; p <= high && i <= max; ++p) {
 887	word descr = AO_load((volatile AO_t *) &(p -> mse_descr));
 888	if (descr != 0) {
 889	    /* Must be ordered after read of descr: */
 890	    AO_store_release_write((volatile AO_t *) &(p -> mse_descr), 0);
 891	    /* More than one thread may get this entry, but that's only */
 892	    /* a minor performance problem.				*/
 893	    ++top;
 894	    top -> mse_descr = descr;
 895	    top -> mse_start = p -> mse_start;
 896	    GC_ASSERT((top -> mse_descr & GC_DS_TAGS) != GC_DS_LENGTH || 
 897		      top -> mse_descr < (ptr_t)GC_greatest_plausible_heap_addr
 898			                 - (ptr_t)GC_least_plausible_heap_addr);
 899	    /* If this is a big object, count it as			*/
 900	    /* size/256 + 1 objects.					*/
 901	    ++i;
 902	    if ((descr & GC_DS_TAGS) == GC_DS_LENGTH) i += (descr >> 8);
 903	}
 904    }
 905    *next = p;
 906    return top;
 907}
 908
 909/* Copy back a local mark stack.	*/
 910/* low and high are inclusive bounds.	*/
 911void GC_return_mark_stack(mse * low, mse * high)
 912{
 913    mse * my_top;
 914    mse * my_start;
 915    size_t stack_size;
 916
 917    if (high < low) return;
 918    stack_size = high - low + 1;
 919    GC_acquire_mark_lock();
 920    my_top = GC_mark_stack_top;	/* Concurrent modification impossible. */
 921    my_start = my_top + 1;
 922    if (my_start - GC_mark_stack + stack_size > GC_mark_stack_size) {
 923      if (GC_print_stats) {
 924	  GC_log_printf("No room to copy back mark stack.");
 925      }
 926      GC_mark_state = MS_INVALID;
 927      GC_mark_stack_too_small = TRUE;
 928      /* We drop the local mark stack.  We'll fix things later.	*/
 929    } else {
 930      BCOPY(low, my_start, stack_size * sizeof(mse));
 931      GC_ASSERT((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
 932		== my_top);
 933      AO_store_release_write((volatile AO_t *)(&GC_mark_stack_top),
 934		             (AO_t)(my_top + stack_size));
 935      		/* Ensures visibility of previously written stack contents. */
 936    }
 937    GC_release_mark_lock();
 938    GC_notify_all_marker();
 939}
 940
 941/* Mark from the local mark stack.		*/
 942/* On return, the local mark stack is empty.	*/
 943/* But this may be achieved by copying the	*/
 944/* local mark stack back into the global one.	*/
 945void GC_do_local_mark(mse *local_mark_stack, mse *local_top)
 946{
 947    unsigned n;
 948#   define N_LOCAL_ITERS 1
 949
 950#   ifdef GC_ASSERTIONS
 951      /* Make sure we don't hold mark lock. */
 952	GC_acquire_mark_lock();
 953	GC_release_mark_lock();
 954#   endif
 955    for (;;) {
 956        for (n = 0; n < N_LOCAL_ITERS; ++n) {
 957	    local_top = GC_mark_from(local_top, local_mark_stack,
 958				     local_mark_stack + LOCAL_MARK_STACK_SIZE);
 959	    if (local_top < local_mark_stack) return;
 960	    if (local_top - local_mark_stack >= LOCAL_MARK_STACK_SIZE/2) {
 961	 	GC_return_mark_stack(local_mark_stack, local_top);
 962		return;
 963	    }
 964	}
 965	if ((mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))
 966	    < (mse *)AO_load(&GC_first_nonempty)
 967	    && GC_active_count < GC_helper_count
 968	    && local_top > local_mark_stack + 1) {
 969	    /* Try to share the load, since the main stack is empty,	*/
 970	    /* and helper threads are waiting for a refill.		*/
 971	    /* The entries near the bottom of the stack are likely	*/
 972	    /* to require more work.  Thus we return those, eventhough	*/
 973	    /* it's harder.						*/
 974 	    mse * new_bottom = local_mark_stack
 975				+ (local_top - local_mark_stack)/2;
 976	    GC_ASSERT(new_bottom > local_mark_stack
 977		      && new_bottom < local_top);
 978	    GC_return_mark_stack(local_mark_stack, new_bottom - 1);
 979	    memmove(local_mark_stack, new_bottom,
 980		    (local_top - new_bottom + 1) * sizeof(mse));
 981	    local_top -= (new_bottom - local_mark_stack);
 982	}
 983    }
 984}
 985
 986#define ENTRIES_TO_GET 5
 987
 988long GC_markers = 2;		/* Normally changed by thread-library-	*/
 989				/* -specific code.			*/
 990
 991/* Mark using the local mark stack until the global mark stack is empty	*/
 992/* and there are no active workers. Update GC_first_nonempty to reflect	*/
 993/* progress.								*/
 994/* Caller does not hold mark lock.					*/
 995/* Caller has already incremented GC_helper_count.  We decrement it,	*/
 996/* and maintain GC_active_count.					*/
 997void GC_mark_local(mse *local_mark_stack, int id)
 998{
 999    mse * my_first_nonempty;
1000
1001    GC_acquire_mark_lock();
1002    GC_active_count++;
1003    my_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1004    GC_ASSERT((mse *)AO_load(&GC_first_nonempty) >= GC_mark_stack && 
1005	      (mse *)AO_load(&GC_first_nonempty) <=
1006	      (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top)) + 1);
1007    if (GC_print_stats == VERBOSE)
1008	GC_log_printf("Starting mark helper %lu\n", (unsigned long)id);
1009    GC_release_mark_lock();
1010    for (;;) {
1011  	size_t n_on_stack;
1012        size_t n_to_get;
1013	mse * my_top;
1014	mse * local_top;
1015        mse * global_first_nonempty = (mse *)AO_load(&GC_first_nonempty);
1016
1017    	GC_ASSERT(my_first_nonempty >= GC_mark_stack && 
1018		  my_first_nonempty <=
1019		  (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))  + 1);
1020    	GC_ASSERT(global_first_nonempty >= GC_mark_stack && 
1021		  global_first_nonempty <= 
1022		  (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top))  + 1);
1023	if (my_first_nonempty < global_first_nonempty) {
1024	    my_first_nonempty = global_first_nonempty;
1025        } else if (global_first_nonempty < my_first_nonempty) {
1026	    AO_compare_and_swap(&GC_first_nonempty, 
1027				(AO_t) global_first_nonempty,
1028				(AO_t) my_first_nonempty);
1029	    /* If this fails, we just go ahead, without updating	*/
1030	    /* GC_first_nonempty.					*/
1031	}
1032	/* Perhaps we should also update GC_first_nonempty, if it */
1033	/* is less.  But that would require using atomic updates. */
1034	my_top = (mse *)AO_load_acquire((volatile AO_t *)(&GC_mark_stack_top));
1035	n_on_stack = my_top - my_first_nonempty + 1;
1036        if (0 == n_on_stack) {
1037	    GC_acquire_mark_lock();
1038            my_top = GC_mark_stack_top;
1039	    	/* Asynchronous modification impossible here, 	*/
1040	        /* since we hold mark lock. 			*/
1041            n_on_stack = my_top - my_first_nonempty + 1;
1042	    if (0 == n_on_stack) {
1043		GC_active_count--;
1044		GC_ASSERT(GC_active_count <= GC_helper_count);
1045		/* Other markers may redeposit objects	*/
1046		/* on the stack.				*/
1047		if (0 == GC_active_count) GC_notify_all_marker();
1048		while (GC_active_count > 0
1049		       && (mse *)AO_load(&GC_first_nonempty)
1050		          > GC_mark_stack_top) {
1051		    /* We will be notified if either GC_active_count	*/
1052		    /* reaches zero, or if more objects are pushed on	*/
1053		    /* the global mark stack.				*/
1054		    GC_wait_marker();
1055		}
1056		if (GC_active_count == 0 &&
1057		    (mse *)AO_load(&GC_first_nonempty) > GC_mark_stack_top) { 
1058		    GC_bool need_to_notify = FALSE;
1059		    /* The above conditions can't be falsified while we	*/
1060		    /* hold the mark lock, since neither 		*/
1061		    /* GC_active_count nor GC_mark_stack_top can	*/
1062		    /* change.  GC_first_nonempty can only be		*/
1063		    /* incremented asynchronously.  Thus we know that	*/
1064		    /* both conditions actually held simultaneously.	*/
1065		    GC_helper_count--;
1066		    if (0 == GC_helper_count) need_to_notify = TRUE;
1067		    if (GC_print_stats == VERBOSE)
1068		      GC_log_printf(
1069		        "Finished mark helper %lu\n", (unsigned long)id);
1070		    GC_release_mark_lock();
1071		    if (need_to_notify) GC_notify_all_marker();
1072		    return;
1073		}
1074		/* else there's something on the stack again, or	*/
1075		/* another helper may push something.			*/
1076		GC_active_count++;
1077	        GC_ASSERT(GC_active_count > 0);
1078		GC_release_mark_lock();
1079		continue;
1080	    } else {
1081		GC_release_mark_lock();
1082	    }
1083	}
1084	n_to_get = ENTRIES_TO_GET;
1085	if (n_on_stack < 2 * ENTRIES_TO_GET) n_to_get = 1;
1086	local_top = GC_steal_mark_stack(my_first_nonempty, my_top,
1087					local_mark_stack, n_to_get,
1088				        &my_first_nonempty);
1089        GC_ASSERT(my_first_nonempty >= GC_mark_stack && 
1090	          my_first_nonempty <=
1091		    (mse *)AO_load((volatile AO_t *)(&GC_mark_stack_top)) + 1);
1092	GC_do_local_mark(local_mark_stack, local_top);
1093    }
1094}
1095
1096/* Perform Parallel mark.			*/
1097/* We hold the GC lock, not the mark lock.	*/
1098/* Currently runs until the mark stack is	*/
1099/* empty.					*/
1100void GC_do_parallel_mark()
1101{
1102    mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1103
1104    GC_acquire_mark_lock();
1105    GC_ASSERT(I_HOLD_LOCK());
1106    /* This could be a GC_ASSERT, but it seems safer to keep it on	*/
1107    /* all the time, especially since it's cheap.			*/
1108    if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0)
1109	ABORT("Tried to start parallel mark in bad state");
1110    if (GC_print_stats == VERBOSE)
1111	GC_log_printf("Starting marking for mark phase number %lu\n",
1112		   (unsigned long)GC_mark_no);
1113    GC_first_nonempty = (AO_t)GC_mark_stack;
1114    GC_active_count = 0;
1115    GC_helper_count = 1;
1116    GC_help_wanted = TRUE;
1117    GC_release_mark_lock();
1118    GC_notify_all_marker();
1119	/* Wake up potential helpers.	*/
1120    GC_mark_local(local_mark_stack, 0);
1121    GC_acquire_mark_lock();
1122    GC_help_wanted = FALSE;
1123    /* Done; clean up.	*/
1124    while (GC_helper_count > 0) GC_wait_marker();
1125    /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1126    if (GC_print_stats == VERBOSE)
1127	GC_log_printf(
1128	    "Finished marking for mark phase number %lu\n",
1129	    (unsigned long)GC_mark_no);
1130    GC_mark_no++;
1131    GC_release_mark_lock();
1132    GC_notify_all_marker();
1133}
1134
1135
1136/* Try to help out the marker, if it's running.	        */
1137/* We do not hold the GC lock, but the requestor does.	*/
1138void GC_help_marker(word my_mark_no)
1139{
1140    mse local_mark_stack[LOCAL_MARK_STACK_SIZE];
1141    unsigned my_id;
1142
1143    if (!GC_parallel) return;
1144    GC_acquire_mark_lock();
1145    while (GC_mark_no < my_mark_no
1146           || (!GC_help_wanted && GC_mark_no == my_mark_no)) {
1147      GC_wait_marker();
1148    }
1149    my_id = GC_helper_count;
1150    if (GC_mark_no != my_mark_no || my_id >= GC_markers) {
1151      /* Second test is useful only if original threads can also	*/
1152      /* act as helpers.  Under Linux they can't.			*/
1153      GC_release_mark_lock();
1154      return;
1155    }
1156    GC_helper_count = my_id + 1;
1157    GC_release_mark_lock();
1158    GC_mark_local(local_mark_stack, my_id);
1159    /* GC_mark_local decrements GC_helper_count. */
1160}
1161
1162#endif /* PARALLEL_MARK */
1163
1164/* Allocate or reallocate space for mark stack of size n entries.  */
1165/* May silently fail.						   */
1166static void alloc_mark_stack(size_t n)
1167{
1168    mse * new_stack = (mse *)GC_scratch_alloc(n * sizeof(struct GC_ms_entry));
1169#   ifdef GWW_VDB
1170      /* Don't recycle a stack segment obtained with the wrong flags. 	*/
1171      /* Win32 GetWriteWatch requires the right kind of memory.		*/
1172      static GC_bool GC_incremental_at_stack_alloc = 0;
1173      GC_bool recycle_old = (!GC_incremental || GC_incremental_at_stack_alloc);
1174
1175      GC_incremental_at_stack_alloc = GC_incremental;
1176#   else
1177#     define recycle_old 1
1178#   endif
1179    
1180    GC_mark_stack_too_small = FALSE;
1181    if (GC_mark_stack_size != 0) {
1182        if (new_stack != 0) {
1183	  if (recycle_old) {
1184            /* Recycle old space */
1185              size_t page_offset = (word)GC_mark_stack & (GC_page_size - 1);
1186              size_t size = GC_mark_stack_size * sizeof(struct GC_ms_entry);
1187	      size_t displ = 0;
1188          
1189	      if (0 != page_offset) displ = GC_page_size - page_offset;
1190	      size = (size - displ) & ~(GC_page_size - 1);
1191	      if (size > 0) {
1192	        GC_add_to_heap((struct hblk *)
1193	      			((word)GC_mark_stack + displ), (word)size);
1194	      }
1195	  }
1196          GC_mark_stack = new_stack;
1197          GC_mark_stack_size = n;
1198	  GC_mark_stack_limit = new_stack + n;
1199	  if (GC_print_stats) {
1200	      GC_log_printf("Grew mark stack to %lu frames\n",
1201		    	    (unsigned long) GC_mark_stack_size);
1202	  }
1203        } else {
1204	  if (GC_print_stats) {
1205	      GC_log_printf("Failed to grow mark stack to %lu frames\n",
1206		    	    (unsigned long) n);
1207	  }
1208        }
1209    } else {
1210        if (new_stack == 0) {
1211            GC_err_printf("No space for mark stack\n");
1212            EXIT();
1213        }
1214        GC_mark_stack = new_stack;
1215        GC_mark_stack_size = n;
1216	GC_mark_stack_limit = new_stack + n;
1217    }
1218    GC_mark_stack_top = GC_mark_stack-1;
1219}
1220
1221void GC_mark_init()
1222{
1223    alloc_mark_stack(INITIAL_MARK_STACK_SIZE);
1224}
1225
1226/*
1227 * Push all locations between b and t onto the mark stack.
1228 * b is the first location to be checked. t is one past the last
1229 * location to be checked.
1230 * Should only be used if there is no possibility of mark stack
1231 * overflow.
1232 */
1233void GC_push_all(ptr_t bottom, ptr_t top)
1234{
1235    register word length;
1236    
1237    bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1238    top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1239    if (top == 0 || bottom == top) return;
1240    GC_mark_stack_top++;
1241    if (GC_mark_stack_top >= GC_mark_stack_limit) {
1242	ABORT("unexpected mark stack overflow");
1243    }
1244    length = top - bottom;
1245#   if GC_DS_TAGS > ALIGNMENT - 1
1246	length += GC_DS_TAGS;
1247	length &= ~GC_DS_TAGS;
1248#   endif
1249    GC_mark_stack_top -> mse_start = bottom;
1250    GC_mark_stack_top -> mse_descr = length;
1251}
1252
1253/*
1254 * Analogous to the above, but push only those pages h with dirty_fn(h) != 0.
1255 * We use push_fn to actually push the block.
1256 * Used both to selectively push dirty pages, or to push a block
1257 * in piecemeal fashion, to allow for more marking concurrency.
1258 * Will not overflow mark stack if push_fn pushes a small fixed number
1259 * of entries.  (This is invoked only if push_fn pushes a single entry,
1260 * or if it marks each object before pushing it, thus ensuring progress
1261 * in the event of a stack overflow.)
1262 */
1263void GC_push_selected(ptr_t bottom, ptr_t top,
1264		      int (*dirty_fn) (struct hblk *),
1265		      void (*push_fn) (ptr_t, ptr_t))
1266{
1267    struct hblk * h;
1268
1269    bottom = (ptr_t)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1270    top = (ptr_t)(((word) top) & ~(ALIGNMENT-1));
1271
1272    if (top == 0 || bottom == top) return;
1273    h = HBLKPTR(bottom + HBLKSIZE);
1274    if (top <= (ptr_t) h) {
1275  	if ((*dirty_fn)(h-1)) {
1276	    (*push_fn)(bottom, top);
1277	}
1278	return;
1279    }
1280    if ((*dirty_fn)(h-1)) {
1281        (*push_fn)(bottom, (ptr_t)h);
1282    }
1283    while ((ptr_t)(h+1) <= top) {
1284	if ((*dirty_fn)(h)) {
1285	    if ((word)(GC_mark_stack_top - GC_mark_stack)
1286		> 3 * GC_mark_stack_size / 4) {
1287	 	/* Danger of mark stack overflow */
1288		(*push_fn)((ptr_t)h, top);
1289		return;
1290	    } else {
1291		(*push_fn)((ptr_t)h, (ptr_t)(h+1));
1292	    }
1293	}
1294	h++;
1295    }
1296    if ((ptr_t)h != top) {
1297	if ((*dirty_fn)(h)) {
1298            (*push_fn)((ptr_t)h, top);
1299        }
1300    }
1301    if (GC_mark_stack_top >= GC_mark_stack_limit) {
1302        ABORT("unexpected mark stack overflow");
1303    }
1304}
1305
1306# ifndef SMALL_CONFIG
1307
1308#ifdef PARALLEL_MARK
1309    /* Break up root sections into page size chunks to better spread 	*/
1310    /* out work.							*/
1311    GC_bool GC_true_func(struct hblk *h) { return TRUE; }
1312#   define GC_PUSH_ALL(b,t) GC_push_selected(b,t,GC_true_func,GC_push_all);
1313#else
1314#   define GC_PUSH_ALL(b,t) GC_push_all(b,t);
1315#endif
1316
1317
1318void GC_push_conditional(ptr_t bottom, ptr_t top, GC_bool all)
1319{
1320    if (all) {
1321      if (GC_dirty_maintained) {
1322#	ifdef PROC_VDB
1323	    /* Pages that were never dirtied cannot contain pointers	*/
1324	    GC_push_selected(bottom, top, GC_page_was_ever_dirty, GC_push_all);
1325#	else
1326	    GC_push_all(bottom, top);
1327#	endif
1328      } else {
1329      	GC_push_all(bottom, top);
1330      }
1331    } else {
1332	GC_push_selected(bottom, top, GC_page_was_dirty, GC_push_all);
1333    }
1334}
1335#endif
1336
1337# if defined(MSWIN32) || defined(MSWINCE)
1338  void __cdecl GC_push_one(word p)
1339# else
1340  void GC_push_one(word p)
1341# endif
1342{
1343    GC_PUSH_ONE_STACK((ptr_t)p, MARKED_FROM_REGISTER);
1344}
1345
1346struct GC_ms_entry *GC_mark_and_push(void *obj,
1347				     mse *mark_stack_ptr,
1348				     mse *mark_stack_limit,
1349				     void **src)
1350{
1351    hdr * hhdr;
1352
1353    PREFETCH(obj);
1354    GET_HDR(obj, hhdr);
1355    if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr),FALSE)) {
1356      if (GC_all_interior_pointers) {
1357	hhdr = GC_find_header(GC_base(obj));
1358	if (hhdr == 0) {
1359          GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1360	  return mark_stack_ptr;
1361	}
1362      } else {
1363        GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1364	return mark_stack_ptr;
1365      }
1366    }
1367    if (EXPECT(HBLK_IS_FREE(hhdr),0)) {
1368	GC_ADD_TO_BLACK_LIST_NORMAL(obj, src);
1369	return mark_stack_ptr;
1370    }
1371
1372    PUSH_CONTENTS_HDR(obj, mark_stack_ptr /* modified */, mark_stack_limit,
1373		      src, was_marked, hhdr, TRUE);
1374 was_marked:
1375    return mark_stack_ptr;
1376}
1377
1378/* Mark and push (i.e. gray) a single object p onto the main	*/
1379/* mark stack.  Consider p to be valid if it is an interior	*/
1380/* pointer.							*/
1381/* The object p has passed a preliminary pointer validity	*/
1382/* test, but we do not definitely know whether it is valid.	*/
1383/* Mark bits are NOT atomically updated.  Thus this must be the	*/
1384/* only thread setting them.					*/
1385# if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
1386    void GC_mark_and_push_stack(ptr_t p, ptr_t source)
1387# else
1388    void GC_mark_and_push_stack(ptr_t p)
1389#   define source 0
1390# endif
1391{
1392    hdr * hhdr; 
1393    ptr_t r = p;
1394  
1395    PREFETCH(p);
1396    GET_HDR(p, hhdr);
1397    if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr),FALSE)) {
1398        if (hhdr != 0) {
1399          r = GC_base(p);
1400	  hhdr = HDR(r);
1401	}
1402        if (hhdr == 0) {
1403            GC_ADD_TO_BLACK_LIST_STACK(p, source);
1404	    return;
1405	}
1406    }
1407    if (EXPECT(HBLK_IS_FREE(hhdr),0)) {
1408	GC_ADD_TO_BLACK_LIST_NORMAL(p, src);
1409	return;
1410    }
1411#   if defined(MANUAL_VDB) && defined(THREADS)
1412      /* Pointer is on the stack.  We may have dirtied the object	*/
1413      /* it points to, but not yet have called GC_dirty();	*/
1414      GC_dirty(p);	/* Implicitly affects entire object.	*/
1415#   endif
1416    PUSH_CONTENTS_HDR(r, GC_mark_stack_top, GC_mark_stack_limit,
1417		      source, mark_and_push_exit, hhdr, FALSE);
1418  mark_and_push_exit: ;
1419    /* We silently ignore pointers to near the end of a block,	*/
1420    /* which is very mildly suboptimal.				*/
1421    /* FIXME: We should probably add a header word to address	*/
1422    /* this.							*/
1423}
1424
1425# ifdef TRACE_BUF
1426
1427# define TRACE_ENTRIES 1000
1428
1429struct trace_entry {
1430    char * kind;
1431    word gc_no;
1432    word bytes_allocd;
1433    word arg1;
1434    word arg2;
1435} GC_trace_buf[TRACE_ENTRIES];
1436
1437int GC_trace_buf_ptr = 0;
1438
1439void GC_add_trace_entry(char *kind, word arg1, word arg2)
1440{
1441    GC_trace_buf[GC_trace_buf_ptr].kind = kind;
1442    GC_trace_buf[GC_trace_buf_ptr].gc_no = GC_gc_no;
1443    GC_trace_buf[GC_trace_buf_ptr].bytes_allocd = GC_bytes_allocd;
1444    GC_trace_buf[GC_trace_buf_ptr].arg1 = arg1 ^ 0x80000000;
1445    GC_trace_buf[GC_trace_buf_ptr].arg2 = arg2 ^ 0x80000000;
1446    GC_trace_buf_ptr++;
1447    if (GC_trace_buf_ptr >= TRACE_ENTRIES) GC_trace_buf_ptr = 0;
1448}
1449
1450void GC_print_trace(word gc_no, GC_bool lock)
1451{
1452    int i;
1453    struct trace_entry *p;
1454    
1455    if (lock) LOCK();
1456    for (i = GC_trace_buf_ptr-1; i != GC_trace_buf_ptr; i--) {
1457    	if (i < 0) i = TRACE_ENTRIES-1;
1458    	p = GC_trace_buf + i;
1459    	if (p -> gc_no < gc_no || p -> kind == 0) return;
1460    	printf("Trace:%s (gc:%d,bytes:%d) 0x%X, 0x%X\n",
1461    		p -> kind, p -> gc_no, p -> bytes_allocd,
1462    		(p -> arg1) ^ 0x80000000, (p -> arg2) ^ 0x80000000);
1463    }
1464    printf("Trace incomplete\n");
1465    if (lock) UNLOCK();
1466}
1467
1468# endif /* TRACE_BUF */
1469
1470/*
1471 * A version of GC_push_all that treats all interior pointers as valid
1472 * and scans the entire region immediately, in case the contents
1473 * change.
1474 */
1475void GC_push_all_eager(ptr_t bottom, ptr_t top)
1476{
1477    word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
1478    word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
1479    register word *p;
1480    register ptr_t q;
1481    register word *lim;
1482    register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
1483    register ptr_t least_ha = GC_least_plausible_heap_addr;
1484#   define GC_greatest_plausible_heap_addr greatest_ha
1485#   define GC_least_plausible_heap_addr least_ha
1486
1487    if (top == 0) return;
1488    /* check all pointers in range and push if they appear	*/
1489    /* to be valid.						*/
1490      lim = t - 1 /* longword */;
1491      for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) {
1492	q = (ptr_t)(*p);
1493	GC_PUSH_ONE_STACK((ptr_t)q, p);
1494      }
1495#   undef GC_greatest_plausible_heap_addr
1496#   undef GC_least_plausible_heap_addr
1497}
1498
1499#ifndef THREADS
1500/*
1501 * A version of GC_push_all that treats all interior pointers as valid
1502 * and scans part of the area immediately, to make sure that saved
1503 * register values are not lost.
1504 * Cold_gc_frame delimits the stack section that must be scanned
1505 * eagerly.  A zero value indicates that no eager scanning is needed.
1506 * We don't need to worry about the MANUAL_VDB case here, since this
1507 * is only called in the single-threaded case.  We assume that we
1508 * cannot collect between an assignment and the corresponding
1509 * GC_dirty() call.
1510 */
1511void GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top,
1512				       ptr_t cold_gc_frame)
1513{
1514  if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
1515    /* Push the hot end of the stack eagerly, so that register values   */
1516    /* saved inside GC frames are mar…

Large files files are truncated, but you can click here to view the full file