/arch/x86_64/kernel/smp.c

https://bitbucket.org/evzijst/gittest · C · 415 lines · 221 code · 55 blank · 139 comment · 34 complexity · 5d3f982baad0a7a9cf727f7cd9b5d221 MD5 · raw file

  1. /*
  2. * Intel SMP support routines.
  3. *
  4. * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
  5. * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
  6. * (c) 2002,2003 Andi Kleen, SuSE Labs.
  7. *
  8. * This code is released under the GNU General Public License version 2 or
  9. * later.
  10. */
  11. #include <linux/init.h>
  12. #include <linux/mm.h>
  13. #include <linux/irq.h>
  14. #include <linux/delay.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/smp_lock.h>
  17. #include <linux/smp.h>
  18. #include <linux/kernel_stat.h>
  19. #include <linux/mc146818rtc.h>
  20. #include <linux/interrupt.h>
  21. #include <asm/mtrr.h>
  22. #include <asm/pgalloc.h>
  23. #include <asm/tlbflush.h>
  24. #include <asm/mach_apic.h>
  25. #include <asm/mmu_context.h>
  26. #include <asm/proto.h>
  27. /*
  28. * Smarter SMP flushing macros.
  29. * c/o Linus Torvalds.
  30. *
  31. * These mean you can really definitely utterly forget about
  32. * writing to user space from interrupts. (Its not allowed anyway).
  33. *
  34. * Optimizations Manfred Spraul <manfred@colorfullife.com>
  35. */
  36. static cpumask_t flush_cpumask;
  37. static struct mm_struct * flush_mm;
  38. static unsigned long flush_va;
  39. static DEFINE_SPINLOCK(tlbstate_lock);
  40. #define FLUSH_ALL -1ULL
  41. /*
  42. * We cannot call mmdrop() because we are in interrupt context,
  43. * instead update mm->cpu_vm_mask.
  44. */
  45. static inline void leave_mm (unsigned long cpu)
  46. {
  47. if (read_pda(mmu_state) == TLBSTATE_OK)
  48. BUG();
  49. clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
  50. load_cr3(swapper_pg_dir);
  51. }
  52. /*
  53. *
  54. * The flush IPI assumes that a thread switch happens in this order:
  55. * [cpu0: the cpu that switches]
  56. * 1) switch_mm() either 1a) or 1b)
  57. * 1a) thread switch to a different mm
  58. * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
  59. * Stop ipi delivery for the old mm. This is not synchronized with
  60. * the other cpus, but smp_invalidate_interrupt ignore flush ipis
  61. * for the wrong mm, and in the worst case we perform a superfluous
  62. * tlb flush.
  63. * 1a2) set cpu mmu_state to TLBSTATE_OK
  64. * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  65. * was in lazy tlb mode.
  66. * 1a3) update cpu active_mm
  67. * Now cpu0 accepts tlb flushes for the new mm.
  68. * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
  69. * Now the other cpus will send tlb flush ipis.
  70. * 1a4) change cr3.
  71. * 1b) thread switch without mm change
  72. * cpu active_mm is correct, cpu0 already handles
  73. * flush ipis.
  74. * 1b1) set cpu mmu_state to TLBSTATE_OK
  75. * 1b2) test_and_set the cpu bit in cpu_vm_mask.
  76. * Atomically set the bit [other cpus will start sending flush ipis],
  77. * and test the bit.
  78. * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  79. * 2) switch %%esp, ie current
  80. *
  81. * The interrupt must handle 2 special cases:
  82. * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
  83. * - the cpu performs speculative tlb reads, i.e. even if the cpu only
  84. * runs in kernel space, the cpu could load tlb entries for user space
  85. * pages.
  86. *
  87. * The good news is that cpu mmu_state is local to each cpu, no
  88. * write/read ordering problems.
  89. */
  90. /*
  91. * TLB flush IPI:
  92. *
  93. * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
  94. * 2) Leave the mm if we are in the lazy tlb mode.
  95. */
  96. asmlinkage void smp_invalidate_interrupt (void)
  97. {
  98. unsigned long cpu;
  99. cpu = get_cpu();
  100. if (!cpu_isset(cpu, flush_cpumask))
  101. goto out;
  102. /*
  103. * This was a BUG() but until someone can quote me the
  104. * line from the intel manual that guarantees an IPI to
  105. * multiple CPUs is retried _only_ on the erroring CPUs
  106. * its staying as a return
  107. *
  108. * BUG();
  109. */
  110. if (flush_mm == read_pda(active_mm)) {
  111. if (read_pda(mmu_state) == TLBSTATE_OK) {
  112. if (flush_va == FLUSH_ALL)
  113. local_flush_tlb();
  114. else
  115. __flush_tlb_one(flush_va);
  116. } else
  117. leave_mm(cpu);
  118. }
  119. ack_APIC_irq();
  120. cpu_clear(cpu, flush_cpumask);
  121. out:
  122. put_cpu_no_resched();
  123. }
  124. static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
  125. unsigned long va)
  126. {
  127. cpumask_t tmp;
  128. /*
  129. * A couple of (to be removed) sanity checks:
  130. *
  131. * - we do not send IPIs to not-yet booted CPUs.
  132. * - current CPU must not be in mask
  133. * - mask must exist :)
  134. */
  135. BUG_ON(cpus_empty(cpumask));
  136. cpus_and(tmp, cpumask, cpu_online_map);
  137. BUG_ON(!cpus_equal(tmp, cpumask));
  138. BUG_ON(cpu_isset(smp_processor_id(), cpumask));
  139. if (!mm)
  140. BUG();
  141. /*
  142. * I'm not happy about this global shared spinlock in the
  143. * MM hot path, but we'll see how contended it is.
  144. * Temporarily this turns IRQs off, so that lockups are
  145. * detected by the NMI watchdog.
  146. */
  147. spin_lock(&tlbstate_lock);
  148. flush_mm = mm;
  149. flush_va = va;
  150. cpus_or(flush_cpumask, cpumask, flush_cpumask);
  151. /*
  152. * We have to send the IPI only to
  153. * CPUs affected.
  154. */
  155. send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
  156. while (!cpus_empty(flush_cpumask))
  157. mb(); /* nothing. lockup detection does not belong here */;
  158. flush_mm = NULL;
  159. flush_va = 0;
  160. spin_unlock(&tlbstate_lock);
  161. }
  162. void flush_tlb_current_task(void)
  163. {
  164. struct mm_struct *mm = current->mm;
  165. cpumask_t cpu_mask;
  166. preempt_disable();
  167. cpu_mask = mm->cpu_vm_mask;
  168. cpu_clear(smp_processor_id(), cpu_mask);
  169. local_flush_tlb();
  170. if (!cpus_empty(cpu_mask))
  171. flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
  172. preempt_enable();
  173. }
  174. void flush_tlb_mm (struct mm_struct * mm)
  175. {
  176. cpumask_t cpu_mask;
  177. preempt_disable();
  178. cpu_mask = mm->cpu_vm_mask;
  179. cpu_clear(smp_processor_id(), cpu_mask);
  180. if (current->active_mm == mm) {
  181. if (current->mm)
  182. local_flush_tlb();
  183. else
  184. leave_mm(smp_processor_id());
  185. }
  186. if (!cpus_empty(cpu_mask))
  187. flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
  188. preempt_enable();
  189. }
  190. void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
  191. {
  192. struct mm_struct *mm = vma->vm_mm;
  193. cpumask_t cpu_mask;
  194. preempt_disable();
  195. cpu_mask = mm->cpu_vm_mask;
  196. cpu_clear(smp_processor_id(), cpu_mask);
  197. if (current->active_mm == mm) {
  198. if(current->mm)
  199. __flush_tlb_one(va);
  200. else
  201. leave_mm(smp_processor_id());
  202. }
  203. if (!cpus_empty(cpu_mask))
  204. flush_tlb_others(cpu_mask, mm, va);
  205. preempt_enable();
  206. }
  207. static void do_flush_tlb_all(void* info)
  208. {
  209. unsigned long cpu = smp_processor_id();
  210. __flush_tlb_all();
  211. if (read_pda(mmu_state) == TLBSTATE_LAZY)
  212. leave_mm(cpu);
  213. }
  214. void flush_tlb_all(void)
  215. {
  216. on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
  217. }
  218. void smp_kdb_stop(void)
  219. {
  220. send_IPI_allbutself(KDB_VECTOR);
  221. }
  222. /*
  223. * this function sends a 'reschedule' IPI to another CPU.
  224. * it goes straight through and wastes no time serializing
  225. * anything. Worst case is that we lose a reschedule ...
  226. */
  227. void smp_send_reschedule(int cpu)
  228. {
  229. send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
  230. }
  231. /*
  232. * Structure and data for smp_call_function(). This is designed to minimise
  233. * static memory requirements. It also looks cleaner.
  234. */
  235. static DEFINE_SPINLOCK(call_lock);
  236. struct call_data_struct {
  237. void (*func) (void *info);
  238. void *info;
  239. atomic_t started;
  240. atomic_t finished;
  241. int wait;
  242. };
  243. static struct call_data_struct * call_data;
  244. /*
  245. * this function sends a 'generic call function' IPI to all other CPUs
  246. * in the system.
  247. */
  248. static void __smp_call_function (void (*func) (void *info), void *info,
  249. int nonatomic, int wait)
  250. {
  251. struct call_data_struct data;
  252. int cpus = num_online_cpus()-1;
  253. if (!cpus)
  254. return;
  255. data.func = func;
  256. data.info = info;
  257. atomic_set(&data.started, 0);
  258. data.wait = wait;
  259. if (wait)
  260. atomic_set(&data.finished, 0);
  261. call_data = &data;
  262. wmb();
  263. /* Send a message to all other CPUs and wait for them to respond */
  264. send_IPI_allbutself(CALL_FUNCTION_VECTOR);
  265. /* Wait for response */
  266. while (atomic_read(&data.started) != cpus)
  267. cpu_relax();
  268. if (!wait)
  269. return;
  270. while (atomic_read(&data.finished) != cpus)
  271. cpu_relax();
  272. }
  273. /*
  274. * smp_call_function - run a function on all other CPUs.
  275. * @func: The function to run. This must be fast and non-blocking.
  276. * @info: An arbitrary pointer to pass to the function.
  277. * @nonatomic: currently unused.
  278. * @wait: If true, wait (atomically) until function has completed on other
  279. * CPUs.
  280. *
  281. * Returns 0 on success, else a negative status code. Does not return until
  282. * remote CPUs are nearly ready to execute func or are or have executed.
  283. *
  284. * You must not call this function with disabled interrupts or from a
  285. * hardware interrupt handler or from a bottom half handler.
  286. * Actually there are a few legal cases, like panic.
  287. */
  288. int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
  289. int wait)
  290. {
  291. spin_lock(&call_lock);
  292. __smp_call_function(func,info,nonatomic,wait);
  293. spin_unlock(&call_lock);
  294. return 0;
  295. }
  296. void smp_stop_cpu(void)
  297. {
  298. /*
  299. * Remove this CPU:
  300. */
  301. cpu_clear(smp_processor_id(), cpu_online_map);
  302. local_irq_disable();
  303. disable_local_APIC();
  304. local_irq_enable();
  305. }
  306. static void smp_really_stop_cpu(void *dummy)
  307. {
  308. smp_stop_cpu();
  309. for (;;)
  310. asm("hlt");
  311. }
  312. void smp_send_stop(void)
  313. {
  314. int nolock = 0;
  315. if (reboot_force)
  316. return;
  317. /* Don't deadlock on the call lock in panic */
  318. if (!spin_trylock(&call_lock)) {
  319. /* ignore locking because we have paniced anyways */
  320. nolock = 1;
  321. }
  322. __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
  323. if (!nolock)
  324. spin_unlock(&call_lock);
  325. local_irq_disable();
  326. disable_local_APIC();
  327. local_irq_enable();
  328. }
  329. /*
  330. * Reschedule call back. Nothing to do,
  331. * all the work is done automatically when
  332. * we return from the interrupt.
  333. */
  334. asmlinkage void smp_reschedule_interrupt(void)
  335. {
  336. ack_APIC_irq();
  337. }
  338. asmlinkage void smp_call_function_interrupt(void)
  339. {
  340. void (*func) (void *info) = call_data->func;
  341. void *info = call_data->info;
  342. int wait = call_data->wait;
  343. ack_APIC_irq();
  344. /*
  345. * Notify initiating CPU that I've grabbed the data and am
  346. * about to execute the function
  347. */
  348. mb();
  349. atomic_inc(&call_data->started);
  350. /*
  351. * At this point the info structure may be out of scope unless wait==1
  352. */
  353. irq_enter();
  354. (*func)(info);
  355. irq_exit();
  356. if (wait) {
  357. mb();
  358. atomic_inc(&call_data->finished);
  359. }
  360. }