PageRenderTime 45ms CodeModel.GetById 3ms app.highlight 34ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/i386/kernel/smpboot.c

https://bitbucket.org/evzijst/gittest
C | 1145 lines | 621 code | 177 blank | 347 comment | 130 complexity | 57ccc8e64147499e0fddf4c9e878eb3e MD5 | raw file
   1/*
   2 *	x86 SMP booting functions
   3 *
   4 *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
   5 *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
   6 *
   7 *	Much of the core SMP work is based on previous work by Thomas Radke, to
   8 *	whom a great many thanks are extended.
   9 *
  10 *	Thanks to Intel for making available several different Pentium,
  11 *	Pentium Pro and Pentium-II/Xeon MP machines.
  12 *	Original development of Linux SMP code supported by Caldera.
  13 *
  14 *	This code is released under the GNU General Public License version 2 or
  15 *	later.
  16 *
  17 *	Fixes
  18 *		Felix Koop	:	NR_CPUS used properly
  19 *		Jose Renau	:	Handle single CPU case.
  20 *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
  21 *		Greg Wright	:	Fix for kernel stacks panic.
  22 *		Erich Boleyn	:	MP v1.4 and additional changes.
  23 *	Matthias Sattler	:	Changes for 2.1 kernel map.
  24 *	Michel Lespinasse	:	Changes for 2.1 kernel map.
  25 *	Michael Chastain	:	Change trampoline.S to gnu as.
  26 *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
  27 *		Ingo Molnar	:	Added APIC timers, based on code
  28 *					from Jose Renau
  29 *		Ingo Molnar	:	various cleanups and rewrites
  30 *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
  31 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
  32 *		Martin J. Bligh	: 	Added support for multi-quad systems
  33 *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
  34*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
  35
  36#include <linux/module.h>
  37#include <linux/config.h>
  38#include <linux/init.h>
  39#include <linux/kernel.h>
  40
  41#include <linux/mm.h>
  42#include <linux/sched.h>
  43#include <linux/kernel_stat.h>
  44#include <linux/smp_lock.h>
  45#include <linux/irq.h>
  46#include <linux/bootmem.h>
  47
  48#include <linux/delay.h>
  49#include <linux/mc146818rtc.h>
  50#include <asm/tlbflush.h>
  51#include <asm/desc.h>
  52#include <asm/arch_hooks.h>
  53
  54#include <mach_apic.h>
  55#include <mach_wakecpu.h>
  56#include <smpboot_hooks.h>
  57
  58/* Set if we find a B stepping CPU */
  59static int __initdata smp_b_stepping;
  60
  61/* Number of siblings per CPU package */
  62int smp_num_siblings = 1;
  63int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
  64EXPORT_SYMBOL(phys_proc_id);
  65
  66/* bitmap of online cpus */
  67cpumask_t cpu_online_map;
  68
  69cpumask_t cpu_callin_map;
  70cpumask_t cpu_callout_map;
  71static cpumask_t smp_commenced_mask;
  72
  73/* Per CPU bogomips and other parameters */
  74struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
  75
  76u8 x86_cpu_to_apicid[NR_CPUS] =
  77			{ [0 ... NR_CPUS-1] = 0xff };
  78EXPORT_SYMBOL(x86_cpu_to_apicid);
  79
  80/*
  81 * Trampoline 80x86 program as an array.
  82 */
  83
  84extern unsigned char trampoline_data [];
  85extern unsigned char trampoline_end  [];
  86static unsigned char *trampoline_base;
  87static int trampoline_exec;
  88
  89static void map_cpu_to_logical_apicid(void);
  90
  91/*
  92 * Currently trivial. Write the real->protected mode
  93 * bootstrap into the page concerned. The caller
  94 * has made sure it's suitably aligned.
  95 */
  96
  97static unsigned long __init setup_trampoline(void)
  98{
  99	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
 100	return virt_to_phys(trampoline_base);
 101}
 102
 103/*
 104 * We are called very early to get the low memory for the
 105 * SMP bootup trampoline page.
 106 */
 107void __init smp_alloc_memory(void)
 108{
 109	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
 110	/*
 111	 * Has to be in very low memory so we can execute
 112	 * real-mode AP code.
 113	 */
 114	if (__pa(trampoline_base) >= 0x9F000)
 115		BUG();
 116	/*
 117	 * Make the SMP trampoline executable:
 118	 */
 119	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
 120}
 121
 122/*
 123 * The bootstrap kernel entry code has set these up. Save them for
 124 * a given CPU
 125 */
 126
 127static void __init smp_store_cpu_info(int id)
 128{
 129	struct cpuinfo_x86 *c = cpu_data + id;
 130
 131	*c = boot_cpu_data;
 132	if (id!=0)
 133		identify_cpu(c);
 134	/*
 135	 * Mask B, Pentium, but not Pentium MMX
 136	 */
 137	if (c->x86_vendor == X86_VENDOR_INTEL &&
 138	    c->x86 == 5 &&
 139	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
 140	    c->x86_model <= 3)
 141		/*
 142		 * Remember we have B step Pentia with bugs
 143		 */
 144		smp_b_stepping = 1;
 145
 146	/*
 147	 * Certain Athlons might work (for various values of 'work') in SMP
 148	 * but they are not certified as MP capable.
 149	 */
 150	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
 151
 152		/* Athlon 660/661 is valid. */	
 153		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
 154			goto valid_k7;
 155
 156		/* Duron 670 is valid */
 157		if ((c->x86_model==7) && (c->x86_mask==0))
 158			goto valid_k7;
 159
 160		/*
 161		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
 162		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
 163		 * have the MP bit set.
 164		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
 165		 */
 166		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
 167		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
 168		     (c->x86_model> 7))
 169			if (cpu_has_mp)
 170				goto valid_k7;
 171
 172		/* If we get here, it's not a certified SMP capable AMD system. */
 173		tainted |= TAINT_UNSAFE_SMP;
 174	}
 175
 176valid_k7:
 177	;
 178}
 179
 180/*
 181 * TSC synchronization.
 182 *
 183 * We first check whether all CPUs have their TSC's synchronized,
 184 * then we print a warning if not, and always resync.
 185 */
 186
 187static atomic_t tsc_start_flag = ATOMIC_INIT(0);
 188static atomic_t tsc_count_start = ATOMIC_INIT(0);
 189static atomic_t tsc_count_stop = ATOMIC_INIT(0);
 190static unsigned long long tsc_values[NR_CPUS];
 191
 192#define NR_LOOPS 5
 193
 194static void __init synchronize_tsc_bp (void)
 195{
 196	int i;
 197	unsigned long long t0;
 198	unsigned long long sum, avg;
 199	long long delta;
 200	unsigned long one_usec;
 201	int buggy = 0;
 202
 203	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
 204
 205	/* convert from kcyc/sec to cyc/usec */
 206	one_usec = cpu_khz / 1000;
 207
 208	atomic_set(&tsc_start_flag, 1);
 209	wmb();
 210
 211	/*
 212	 * We loop a few times to get a primed instruction cache,
 213	 * then the last pass is more or less synchronized and
 214	 * the BP and APs set their cycle counters to zero all at
 215	 * once. This reduces the chance of having random offsets
 216	 * between the processors, and guarantees that the maximum
 217	 * delay between the cycle counters is never bigger than
 218	 * the latency of information-passing (cachelines) between
 219	 * two CPUs.
 220	 */
 221	for (i = 0; i < NR_LOOPS; i++) {
 222		/*
 223		 * all APs synchronize but they loop on '== num_cpus'
 224		 */
 225		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
 226			mb();
 227		atomic_set(&tsc_count_stop, 0);
 228		wmb();
 229		/*
 230		 * this lets the APs save their current TSC:
 231		 */
 232		atomic_inc(&tsc_count_start);
 233
 234		rdtscll(tsc_values[smp_processor_id()]);
 235		/*
 236		 * We clear the TSC in the last loop:
 237		 */
 238		if (i == NR_LOOPS-1)
 239			write_tsc(0, 0);
 240
 241		/*
 242		 * Wait for all APs to leave the synchronization point:
 243		 */
 244		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
 245			mb();
 246		atomic_set(&tsc_count_start, 0);
 247		wmb();
 248		atomic_inc(&tsc_count_stop);
 249	}
 250
 251	sum = 0;
 252	for (i = 0; i < NR_CPUS; i++) {
 253		if (cpu_isset(i, cpu_callout_map)) {
 254			t0 = tsc_values[i];
 255			sum += t0;
 256		}
 257	}
 258	avg = sum;
 259	do_div(avg, num_booting_cpus());
 260
 261	sum = 0;
 262	for (i = 0; i < NR_CPUS; i++) {
 263		if (!cpu_isset(i, cpu_callout_map))
 264			continue;
 265		delta = tsc_values[i] - avg;
 266		if (delta < 0)
 267			delta = -delta;
 268		/*
 269		 * We report bigger than 2 microseconds clock differences.
 270		 */
 271		if (delta > 2*one_usec) {
 272			long realdelta;
 273			if (!buggy) {
 274				buggy = 1;
 275				printk("\n");
 276			}
 277			realdelta = delta;
 278			do_div(realdelta, one_usec);
 279			if (tsc_values[i] < avg)
 280				realdelta = -realdelta;
 281
 282			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
 283		}
 284
 285		sum += delta;
 286	}
 287	if (!buggy)
 288		printk("passed.\n");
 289}
 290
 291static void __init synchronize_tsc_ap (void)
 292{
 293	int i;
 294
 295	/*
 296	 * Not every cpu is online at the time
 297	 * this gets called, so we first wait for the BP to
 298	 * finish SMP initialization:
 299	 */
 300	while (!atomic_read(&tsc_start_flag)) mb();
 301
 302	for (i = 0; i < NR_LOOPS; i++) {
 303		atomic_inc(&tsc_count_start);
 304		while (atomic_read(&tsc_count_start) != num_booting_cpus())
 305			mb();
 306
 307		rdtscll(tsc_values[smp_processor_id()]);
 308		if (i == NR_LOOPS-1)
 309			write_tsc(0, 0);
 310
 311		atomic_inc(&tsc_count_stop);
 312		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
 313	}
 314}
 315#undef NR_LOOPS
 316
 317extern void calibrate_delay(void);
 318
 319static atomic_t init_deasserted;
 320
 321static void __init smp_callin(void)
 322{
 323	int cpuid, phys_id;
 324	unsigned long timeout;
 325
 326	/*
 327	 * If waken up by an INIT in an 82489DX configuration
 328	 * we may get here before an INIT-deassert IPI reaches
 329	 * our local APIC.  We have to wait for the IPI or we'll
 330	 * lock up on an APIC access.
 331	 */
 332	wait_for_init_deassert(&init_deasserted);
 333
 334	/*
 335	 * (This works even if the APIC is not enabled.)
 336	 */
 337	phys_id = GET_APIC_ID(apic_read(APIC_ID));
 338	cpuid = smp_processor_id();
 339	if (cpu_isset(cpuid, cpu_callin_map)) {
 340		printk("huh, phys CPU#%d, CPU#%d already present??\n",
 341					phys_id, cpuid);
 342		BUG();
 343	}
 344	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
 345
 346	/*
 347	 * STARTUP IPIs are fragile beasts as they might sometimes
 348	 * trigger some glue motherboard logic. Complete APIC bus
 349	 * silence for 1 second, this overestimates the time the
 350	 * boot CPU is spending to send the up to 2 STARTUP IPIs
 351	 * by a factor of two. This should be enough.
 352	 */
 353
 354	/*
 355	 * Waiting 2s total for startup (udelay is not yet working)
 356	 */
 357	timeout = jiffies + 2*HZ;
 358	while (time_before(jiffies, timeout)) {
 359		/*
 360		 * Has the boot CPU finished it's STARTUP sequence?
 361		 */
 362		if (cpu_isset(cpuid, cpu_callout_map))
 363			break;
 364		rep_nop();
 365	}
 366
 367	if (!time_before(jiffies, timeout)) {
 368		printk("BUG: CPU%d started up but did not get a callout!\n",
 369			cpuid);
 370		BUG();
 371	}
 372
 373	/*
 374	 * the boot CPU has finished the init stage and is spinning
 375	 * on callin_map until we finish. We are free to set up this
 376	 * CPU, first the APIC. (this is probably redundant on most
 377	 * boards)
 378	 */
 379
 380	Dprintk("CALLIN, before setup_local_APIC().\n");
 381	smp_callin_clear_local_apic();
 382	setup_local_APIC();
 383	map_cpu_to_logical_apicid();
 384
 385	/*
 386	 * Get our bogomips.
 387	 */
 388	calibrate_delay();
 389	Dprintk("Stack at about %p\n",&cpuid);
 390
 391	/*
 392	 * Save our processor parameters
 393	 */
 394 	smp_store_cpu_info(cpuid);
 395
 396	disable_APIC_timer();
 397
 398	/*
 399	 * Allow the master to continue.
 400	 */
 401	cpu_set(cpuid, cpu_callin_map);
 402
 403	/*
 404	 *      Synchronize the TSC with the BP
 405	 */
 406	if (cpu_has_tsc && cpu_khz)
 407		synchronize_tsc_ap();
 408}
 409
 410static int cpucount;
 411
 412/*
 413 * Activate a secondary processor.
 414 */
 415static void __init start_secondary(void *unused)
 416{
 417	/*
 418	 * Dont put anything before smp_callin(), SMP
 419	 * booting is too fragile that we want to limit the
 420	 * things done here to the most necessary things.
 421	 */
 422	cpu_init();
 423	smp_callin();
 424	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
 425		rep_nop();
 426	setup_secondary_APIC_clock();
 427	if (nmi_watchdog == NMI_IO_APIC) {
 428		disable_8259A_irq(0);
 429		enable_NMI_through_LVT0(NULL);
 430		enable_8259A_irq(0);
 431	}
 432	enable_APIC_timer();
 433	/*
 434	 * low-memory mappings have been cleared, flush them from
 435	 * the local TLBs too.
 436	 */
 437	local_flush_tlb();
 438	cpu_set(smp_processor_id(), cpu_online_map);
 439
 440	/* We can take interrupts now: we're officially "up". */
 441	local_irq_enable();
 442
 443	wmb();
 444	cpu_idle();
 445}
 446
 447/*
 448 * Everything has been set up for the secondary
 449 * CPUs - they just need to reload everything
 450 * from the task structure
 451 * This function must not return.
 452 */
 453void __init initialize_secondary(void)
 454{
 455	/*
 456	 * We don't actually need to load the full TSS,
 457	 * basically just the stack pointer and the eip.
 458	 */
 459
 460	asm volatile(
 461		"movl %0,%%esp\n\t"
 462		"jmp *%1"
 463		:
 464		:"r" (current->thread.esp),"r" (current->thread.eip));
 465}
 466
 467extern struct {
 468	void * esp;
 469	unsigned short ss;
 470} stack_start;
 471
 472#ifdef CONFIG_NUMA
 473
 474/* which logical CPUs are on which nodes */
 475cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
 476				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
 477/* which node each logical CPU is on */
 478int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
 479EXPORT_SYMBOL(cpu_2_node);
 480
 481/* set up a mapping between cpu and node. */
 482static inline void map_cpu_to_node(int cpu, int node)
 483{
 484	printk("Mapping cpu %d to node %d\n", cpu, node);
 485	cpu_set(cpu, node_2_cpu_mask[node]);
 486	cpu_2_node[cpu] = node;
 487}
 488
 489/* undo a mapping between cpu and node. */
 490static inline void unmap_cpu_to_node(int cpu)
 491{
 492	int node;
 493
 494	printk("Unmapping cpu %d from all nodes\n", cpu);
 495	for (node = 0; node < MAX_NUMNODES; node ++)
 496		cpu_clear(cpu, node_2_cpu_mask[node]);
 497	cpu_2_node[cpu] = 0;
 498}
 499#else /* !CONFIG_NUMA */
 500
 501#define map_cpu_to_node(cpu, node)	({})
 502#define unmap_cpu_to_node(cpu)	({})
 503
 504#endif /* CONFIG_NUMA */
 505
 506u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
 507
 508static void map_cpu_to_logical_apicid(void)
 509{
 510	int cpu = smp_processor_id();
 511	int apicid = logical_smp_processor_id();
 512
 513	cpu_2_logical_apicid[cpu] = apicid;
 514	map_cpu_to_node(cpu, apicid_to_node(apicid));
 515}
 516
 517static void unmap_cpu_to_logical_apicid(int cpu)
 518{
 519	cpu_2_logical_apicid[cpu] = BAD_APICID;
 520	unmap_cpu_to_node(cpu);
 521}
 522
 523#if APIC_DEBUG
 524static inline void __inquire_remote_apic(int apicid)
 525{
 526	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
 527	char *names[] = { "ID", "VERSION", "SPIV" };
 528	int timeout, status;
 529
 530	printk("Inquiring remote APIC #%d...\n", apicid);
 531
 532	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
 533		printk("... APIC #%d %s: ", apicid, names[i]);
 534
 535		/*
 536		 * Wait for idle.
 537		 */
 538		apic_wait_icr_idle();
 539
 540		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 541		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
 542
 543		timeout = 0;
 544		do {
 545			udelay(100);
 546			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
 547		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
 548
 549		switch (status) {
 550		case APIC_ICR_RR_VALID:
 551			status = apic_read(APIC_RRR);
 552			printk("%08x\n", status);
 553			break;
 554		default:
 555			printk("failed\n");
 556		}
 557	}
 558}
 559#endif
 560
 561#ifdef WAKE_SECONDARY_VIA_NMI
 562/* 
 563 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
 564 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
 565 * won't ... remember to clear down the APIC, etc later.
 566 */
 567static int __init
 568wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 569{
 570	unsigned long send_status = 0, accept_status = 0;
 571	int timeout, maxlvt;
 572
 573	/* Target chip */
 574	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
 575
 576	/* Boot on the stack */
 577	/* Kick the second */
 578	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
 579
 580	Dprintk("Waiting for send to finish...\n");
 581	timeout = 0;
 582	do {
 583		Dprintk("+");
 584		udelay(100);
 585		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 586	} while (send_status && (timeout++ < 1000));
 587
 588	/*
 589	 * Give the other CPU some time to accept the IPI.
 590	 */
 591	udelay(200);
 592	/*
 593	 * Due to the Pentium erratum 3AP.
 594	 */
 595	maxlvt = get_maxlvt();
 596	if (maxlvt > 3) {
 597		apic_read_around(APIC_SPIV);
 598		apic_write(APIC_ESR, 0);
 599	}
 600	accept_status = (apic_read(APIC_ESR) & 0xEF);
 601	Dprintk("NMI sent.\n");
 602
 603	if (send_status)
 604		printk("APIC never delivered???\n");
 605	if (accept_status)
 606		printk("APIC delivery error (%lx).\n", accept_status);
 607
 608	return (send_status | accept_status);
 609}
 610#endif	/* WAKE_SECONDARY_VIA_NMI */
 611
 612#ifdef WAKE_SECONDARY_VIA_INIT
 613static int __init
 614wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 615{
 616	unsigned long send_status = 0, accept_status = 0;
 617	int maxlvt, timeout, num_starts, j;
 618
 619	/*
 620	 * Be paranoid about clearing APIC errors.
 621	 */
 622	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
 623		apic_read_around(APIC_SPIV);
 624		apic_write(APIC_ESR, 0);
 625		apic_read(APIC_ESR);
 626	}
 627
 628	Dprintk("Asserting INIT.\n");
 629
 630	/*
 631	 * Turn INIT on target chip
 632	 */
 633	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 634
 635	/*
 636	 * Send IPI
 637	 */
 638	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
 639				| APIC_DM_INIT);
 640
 641	Dprintk("Waiting for send to finish...\n");
 642	timeout = 0;
 643	do {
 644		Dprintk("+");
 645		udelay(100);
 646		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 647	} while (send_status && (timeout++ < 1000));
 648
 649	mdelay(10);
 650
 651	Dprintk("Deasserting INIT.\n");
 652
 653	/* Target chip */
 654	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 655
 656	/* Send IPI */
 657	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 658
 659	Dprintk("Waiting for send to finish...\n");
 660	timeout = 0;
 661	do {
 662		Dprintk("+");
 663		udelay(100);
 664		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 665	} while (send_status && (timeout++ < 1000));
 666
 667	atomic_set(&init_deasserted, 1);
 668
 669	/*
 670	 * Should we send STARTUP IPIs ?
 671	 *
 672	 * Determine this based on the APIC version.
 673	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
 674	 */
 675	if (APIC_INTEGRATED(apic_version[phys_apicid]))
 676		num_starts = 2;
 677	else
 678		num_starts = 0;
 679
 680	/*
 681	 * Run STARTUP IPI loop.
 682	 */
 683	Dprintk("#startup loops: %d.\n", num_starts);
 684
 685	maxlvt = get_maxlvt();
 686
 687	for (j = 1; j <= num_starts; j++) {
 688		Dprintk("Sending STARTUP #%d.\n",j);
 689		apic_read_around(APIC_SPIV);
 690		apic_write(APIC_ESR, 0);
 691		apic_read(APIC_ESR);
 692		Dprintk("After apic_write.\n");
 693
 694		/*
 695		 * STARTUP IPI
 696		 */
 697
 698		/* Target chip */
 699		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 700
 701		/* Boot on the stack */
 702		/* Kick the second */
 703		apic_write_around(APIC_ICR, APIC_DM_STARTUP
 704					| (start_eip >> 12));
 705
 706		/*
 707		 * Give the other CPU some time to accept the IPI.
 708		 */
 709		udelay(300);
 710
 711		Dprintk("Startup point 1.\n");
 712
 713		Dprintk("Waiting for send to finish...\n");
 714		timeout = 0;
 715		do {
 716			Dprintk("+");
 717			udelay(100);
 718			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 719		} while (send_status && (timeout++ < 1000));
 720
 721		/*
 722		 * Give the other CPU some time to accept the IPI.
 723		 */
 724		udelay(200);
 725		/*
 726		 * Due to the Pentium erratum 3AP.
 727		 */
 728		if (maxlvt > 3) {
 729			apic_read_around(APIC_SPIV);
 730			apic_write(APIC_ESR, 0);
 731		}
 732		accept_status = (apic_read(APIC_ESR) & 0xEF);
 733		if (send_status || accept_status)
 734			break;
 735	}
 736	Dprintk("After Startup.\n");
 737
 738	if (send_status)
 739		printk("APIC never delivered???\n");
 740	if (accept_status)
 741		printk("APIC delivery error (%lx).\n", accept_status);
 742
 743	return (send_status | accept_status);
 744}
 745#endif	/* WAKE_SECONDARY_VIA_INIT */
 746
 747extern cpumask_t cpu_initialized;
 748
 749static int __init do_boot_cpu(int apicid)
 750/*
 751 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 752 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 753 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
 754 */
 755{
 756	struct task_struct *idle;
 757	unsigned long boot_error;
 758	int timeout, cpu;
 759	unsigned long start_eip;
 760	unsigned short nmi_high = 0, nmi_low = 0;
 761
 762	cpu = ++cpucount;
 763	/*
 764	 * We can't use kernel_thread since we must avoid to
 765	 * reschedule the child.
 766	 */
 767	idle = fork_idle(cpu);
 768	if (IS_ERR(idle))
 769		panic("failed fork for CPU %d", cpu);
 770	idle->thread.eip = (unsigned long) start_secondary;
 771	/* start_eip had better be page-aligned! */
 772	start_eip = setup_trampoline();
 773
 774	/* So we see what's up   */
 775	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 776	/* Stack for startup_32 can be just as for start_secondary onwards */
 777	stack_start.esp = (void *) idle->thread.esp;
 778
 779	irq_ctx_init(cpu);
 780
 781	/*
 782	 * This grunge runs the startup process for
 783	 * the targeted processor.
 784	 */
 785
 786	atomic_set(&init_deasserted, 0);
 787
 788	Dprintk("Setting warm reset code and vector.\n");
 789
 790	store_NMI_vector(&nmi_high, &nmi_low);
 791
 792	smpboot_setup_warm_reset_vector(start_eip);
 793
 794	/*
 795	 * Starting actual IPI sequence...
 796	 */
 797	boot_error = wakeup_secondary_cpu(apicid, start_eip);
 798
 799	if (!boot_error) {
 800		/*
 801		 * allow APs to start initializing.
 802		 */
 803		Dprintk("Before Callout %d.\n", cpu);
 804		cpu_set(cpu, cpu_callout_map);
 805		Dprintk("After Callout %d.\n", cpu);
 806
 807		/*
 808		 * Wait 5s total for a response
 809		 */
 810		for (timeout = 0; timeout < 50000; timeout++) {
 811			if (cpu_isset(cpu, cpu_callin_map))
 812				break;	/* It has booted */
 813			udelay(100);
 814		}
 815
 816		if (cpu_isset(cpu, cpu_callin_map)) {
 817			/* number CPUs logically, starting from 1 (BSP is 0) */
 818			Dprintk("OK.\n");
 819			printk("CPU%d: ", cpu);
 820			print_cpu_info(&cpu_data[cpu]);
 821			Dprintk("CPU has booted.\n");
 822		} else {
 823			boot_error= 1;
 824			if (*((volatile unsigned char *)trampoline_base)
 825					== 0xA5)
 826				/* trampoline started but...? */
 827				printk("Stuck ??\n");
 828			else
 829				/* trampoline code not run */
 830				printk("Not responding.\n");
 831			inquire_remote_apic(apicid);
 832		}
 833	}
 834	x86_cpu_to_apicid[cpu] = apicid;
 835	if (boot_error) {
 836		/* Try to put things back the way they were before ... */
 837		unmap_cpu_to_logical_apicid(cpu);
 838		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 839		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
 840		cpucount--;
 841	}
 842
 843	/* mark "stuck" area as not stuck */
 844	*((volatile unsigned long *)trampoline_base) = 0;
 845
 846	return boot_error;
 847}
 848
 849static void smp_tune_scheduling (void)
 850{
 851	unsigned long cachesize;       /* kB   */
 852	unsigned long bandwidth = 350; /* MB/s */
 853	/*
 854	 * Rough estimation for SMP scheduling, this is the number of
 855	 * cycles it takes for a fully memory-limited process to flush
 856	 * the SMP-local cache.
 857	 *
 858	 * (For a P5 this pretty much means we will choose another idle
 859	 *  CPU almost always at wakeup time (this is due to the small
 860	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
 861	 *  the cache size)
 862	 */
 863
 864	if (!cpu_khz) {
 865		/*
 866		 * this basically disables processor-affinity
 867		 * scheduling on SMP without a TSC.
 868		 */
 869		return;
 870	} else {
 871		cachesize = boot_cpu_data.x86_cache_size;
 872		if (cachesize == -1) {
 873			cachesize = 16; /* Pentiums, 2x8kB cache */
 874			bandwidth = 100;
 875		}
 876	}
 877}
 878
 879/*
 880 * Cycle through the processors sending APIC IPIs to boot each.
 881 */
 882
 883static int boot_cpu_logical_apicid;
 884/* Where the IO area was mapped on multiquad, always 0 otherwise */
 885void *xquad_portio;
 886
 887cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 888
 889static void __init smp_boot_cpus(unsigned int max_cpus)
 890{
 891	int apicid, cpu, bit, kicked;
 892	unsigned long bogosum = 0;
 893
 894	/*
 895	 * Setup boot CPU information
 896	 */
 897	smp_store_cpu_info(0); /* Final full version of the data */
 898	printk("CPU%d: ", 0);
 899	print_cpu_info(&cpu_data[0]);
 900
 901	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 902	boot_cpu_logical_apicid = logical_smp_processor_id();
 903	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
 904
 905	current_thread_info()->cpu = 0;
 906	smp_tune_scheduling();
 907	cpus_clear(cpu_sibling_map[0]);
 908	cpu_set(0, cpu_sibling_map[0]);
 909
 910	/*
 911	 * If we couldn't find an SMP configuration at boot time,
 912	 * get out of here now!
 913	 */
 914	if (!smp_found_config && !acpi_lapic) {
 915		printk(KERN_NOTICE "SMP motherboard not detected.\n");
 916		smpboot_clear_io_apic_irqs();
 917		phys_cpu_present_map = physid_mask_of_physid(0);
 918		if (APIC_init_uniprocessor())
 919			printk(KERN_NOTICE "Local APIC not detected."
 920					   " Using dummy APIC emulation.\n");
 921		map_cpu_to_logical_apicid();
 922		return;
 923	}
 924
 925	/*
 926	 * Should not be necessary because the MP table should list the boot
 927	 * CPU too, but we do it for the sake of robustness anyway.
 928	 * Makes no sense to do this check in clustered apic mode, so skip it
 929	 */
 930	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
 931		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
 932				boot_cpu_physical_apicid);
 933		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 934	}
 935
 936	/*
 937	 * If we couldn't find a local APIC, then get out of here now!
 938	 */
 939	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
 940		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 941			boot_cpu_physical_apicid);
 942		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 943		smpboot_clear_io_apic_irqs();
 944		phys_cpu_present_map = physid_mask_of_physid(0);
 945		return;
 946	}
 947
 948	verify_local_APIC();
 949
 950	/*
 951	 * If SMP should be disabled, then really disable it!
 952	 */
 953	if (!max_cpus) {
 954		smp_found_config = 0;
 955		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
 956		smpboot_clear_io_apic_irqs();
 957		phys_cpu_present_map = physid_mask_of_physid(0);
 958		return;
 959	}
 960
 961	connect_bsp_APIC();
 962	setup_local_APIC();
 963	map_cpu_to_logical_apicid();
 964
 965
 966	setup_portio_remap();
 967
 968	/*
 969	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
 970	 *
 971	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
 972	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
 973	 * clustered apic ID.
 974	 */
 975	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
 976
 977	kicked = 1;
 978	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
 979		apicid = cpu_present_to_apicid(bit);
 980		/*
 981		 * Don't even attempt to start the boot CPU!
 982		 */
 983		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
 984			continue;
 985
 986		if (!check_apicid_present(bit))
 987			continue;
 988		if (max_cpus <= cpucount+1)
 989			continue;
 990
 991		if (do_boot_cpu(apicid))
 992			printk("CPU #%d not responding - cannot use it.\n",
 993								apicid);
 994		else
 995			++kicked;
 996	}
 997
 998	/*
 999	 * Cleanup possible dangling ends...
1000	 */
1001	smpboot_restore_warm_reset_vector();
1002
1003	/*
1004	 * Allow the user to impress friends.
1005	 */
1006	Dprintk("Before bogomips.\n");
1007	for (cpu = 0; cpu < NR_CPUS; cpu++)
1008		if (cpu_isset(cpu, cpu_callout_map))
1009			bogosum += cpu_data[cpu].loops_per_jiffy;
1010	printk(KERN_INFO
1011		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1012		cpucount+1,
1013		bogosum/(500000/HZ),
1014		(bogosum/(5000/HZ))%100);
1015	
1016	Dprintk("Before bogocount - setting activated=1.\n");
1017
1018	if (smp_b_stepping)
1019		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1020
1021	/*
1022	 * Don't taint if we are running SMP kernel on a single non-MP
1023	 * approved Athlon
1024	 */
1025	if (tainted & TAINT_UNSAFE_SMP) {
1026		if (cpucount)
1027			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1028		else
1029			tainted &= ~TAINT_UNSAFE_SMP;
1030	}
1031
1032	Dprintk("Boot done.\n");
1033
1034	/*
1035	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1036	 * efficiently.
1037	 */
1038	for (cpu = 0; cpu < NR_CPUS; cpu++)
1039		cpus_clear(cpu_sibling_map[cpu]);
1040
1041	for (cpu = 0; cpu < NR_CPUS; cpu++) {
1042		int siblings = 0;
1043		int i;
1044		if (!cpu_isset(cpu, cpu_callout_map))
1045			continue;
1046
1047		if (smp_num_siblings > 1) {
1048			for (i = 0; i < NR_CPUS; i++) {
1049				if (!cpu_isset(i, cpu_callout_map))
1050					continue;
1051				if (phys_proc_id[cpu] == phys_proc_id[i]) {
1052					siblings++;
1053					cpu_set(i, cpu_sibling_map[cpu]);
1054				}
1055			}
1056		} else {
1057			siblings++;
1058			cpu_set(cpu, cpu_sibling_map[cpu]);
1059		}
1060
1061		if (siblings != smp_num_siblings)
1062			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1063	}
1064
1065	if (nmi_watchdog == NMI_LOCAL_APIC)
1066		check_nmi_watchdog();
1067
1068	smpboot_setup_io_apic();
1069
1070	setup_boot_APIC_clock();
1071
1072	/*
1073	 * Synchronize the TSC with the AP
1074	 */
1075	if (cpu_has_tsc && cpucount && cpu_khz)
1076		synchronize_tsc_bp();
1077}
1078
1079/* These are wrappers to interface to the new boot process.  Someone
1080   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1081void __init smp_prepare_cpus(unsigned int max_cpus)
1082{
1083	smp_boot_cpus(max_cpus);
1084}
1085
1086void __devinit smp_prepare_boot_cpu(void)
1087{
1088	cpu_set(smp_processor_id(), cpu_online_map);
1089	cpu_set(smp_processor_id(), cpu_callout_map);
1090}
1091
1092int __devinit __cpu_up(unsigned int cpu)
1093{
1094	/* This only works at boot for x86.  See "rewrite" above. */
1095	if (cpu_isset(cpu, smp_commenced_mask)) {
1096		local_irq_enable();
1097		return -ENOSYS;
1098	}
1099
1100	/* In case one didn't come up */
1101	if (!cpu_isset(cpu, cpu_callin_map)) {
1102		local_irq_enable();
1103		return -EIO;
1104	}
1105
1106	local_irq_enable();
1107	/* Unleash the CPU! */
1108	cpu_set(cpu, smp_commenced_mask);
1109	while (!cpu_isset(cpu, cpu_online_map))
1110		mb();
1111	return 0;
1112}
1113
1114void __init smp_cpus_done(unsigned int max_cpus)
1115{
1116#ifdef CONFIG_X86_IO_APIC
1117	setup_ioapic_dest();
1118#endif
1119	zap_low_mappings();
1120	/*
1121	 * Disable executability of the SMP trampoline:
1122	 */
1123	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1124}
1125
1126void __init smp_intr_init(void)
1127{
1128	/*
1129	 * IRQ0 must be given a fixed assignment and initialized,
1130	 * because it's used before the IO-APIC is set up.
1131	 */
1132	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1133
1134	/*
1135	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1136	 * IPI, driven by wakeup.
1137	 */
1138	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1139
1140	/* IPI for invalidation */
1141	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1142
1143	/* IPI for generic function call */
1144	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1145}