PageRenderTime 5792ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/drivers/clocksource/hyperv_timer.c

https://github.com/tytso/ext4
C | 475 lines | 267 code | 67 blank | 141 comment | 20 complexity | c4dc454c00e4b0299fba77eb87440408 MD5 | raw file
Possible License(s): GPL-2.0
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Clocksource driver for the synthetic counter and timers
  4. * provided by the Hyper-V hypervisor to guest VMs, as described
  5. * in the Hyper-V Top Level Functional Spec (TLFS). This driver
  6. * is instruction set architecture independent.
  7. *
  8. * Copyright (C) 2019, Microsoft, Inc.
  9. *
  10. * Author: Michael Kelley <mikelley@microsoft.com>
  11. */
  12. #include <linux/percpu.h>
  13. #include <linux/cpumask.h>
  14. #include <linux/clockchips.h>
  15. #include <linux/clocksource.h>
  16. #include <linux/sched_clock.h>
  17. #include <linux/mm.h>
  18. #include <linux/cpuhotplug.h>
  19. #include <clocksource/hyperv_timer.h>
  20. #include <asm/hyperv-tlfs.h>
  21. #include <asm/mshyperv.h>
  22. static struct clock_event_device __percpu *hv_clock_event;
  23. static u64 hv_sched_clock_offset __ro_after_init;
  24. /*
  25. * If false, we're using the old mechanism for stimer0 interrupts
  26. * where it sends a VMbus message when it expires. The old
  27. * mechanism is used when running on older versions of Hyper-V
  28. * that don't support Direct Mode. While Hyper-V provides
  29. * four stimer's per CPU, Linux uses only stimer0.
  30. *
  31. * Because Direct Mode does not require processing a VMbus
  32. * message, stimer interrupts can be enabled earlier in the
  33. * process of booting a CPU, and consistent with when timer
  34. * interrupts are enabled for other clocksource drivers.
  35. * However, for legacy versions of Hyper-V when Direct Mode
  36. * is not enabled, setting up stimer interrupts must be
  37. * delayed until VMbus is initialized and can process the
  38. * interrupt message.
  39. */
  40. static bool direct_mode_enabled;
  41. static int stimer0_irq;
  42. static int stimer0_vector;
  43. static int stimer0_message_sint;
  44. /*
  45. * ISR for when stimer0 is operating in Direct Mode. Direct Mode
  46. * does not use VMbus or any VMbus messages, so process here and not
  47. * in the VMbus driver code.
  48. */
  49. void hv_stimer0_isr(void)
  50. {
  51. struct clock_event_device *ce;
  52. ce = this_cpu_ptr(hv_clock_event);
  53. ce->event_handler(ce);
  54. }
  55. EXPORT_SYMBOL_GPL(hv_stimer0_isr);
  56. static int hv_ce_set_next_event(unsigned long delta,
  57. struct clock_event_device *evt)
  58. {
  59. u64 current_tick;
  60. current_tick = hv_read_reference_counter();
  61. current_tick += delta;
  62. hv_init_timer(0, current_tick);
  63. return 0;
  64. }
  65. static int hv_ce_shutdown(struct clock_event_device *evt)
  66. {
  67. hv_init_timer(0, 0);
  68. hv_init_timer_config(0, 0);
  69. if (direct_mode_enabled)
  70. hv_disable_stimer0_percpu_irq(stimer0_irq);
  71. return 0;
  72. }
  73. static int hv_ce_set_oneshot(struct clock_event_device *evt)
  74. {
  75. union hv_stimer_config timer_cfg;
  76. timer_cfg.as_uint64 = 0;
  77. timer_cfg.enable = 1;
  78. timer_cfg.auto_enable = 1;
  79. if (direct_mode_enabled) {
  80. /*
  81. * When it expires, the timer will directly interrupt
  82. * on the specified hardware vector/IRQ.
  83. */
  84. timer_cfg.direct_mode = 1;
  85. timer_cfg.apic_vector = stimer0_vector;
  86. hv_enable_stimer0_percpu_irq(stimer0_irq);
  87. } else {
  88. /*
  89. * When it expires, the timer will generate a VMbus message,
  90. * to be handled by the normal VMbus interrupt handler.
  91. */
  92. timer_cfg.direct_mode = 0;
  93. timer_cfg.sintx = stimer0_message_sint;
  94. }
  95. hv_init_timer_config(0, timer_cfg.as_uint64);
  96. return 0;
  97. }
  98. /*
  99. * hv_stimer_init - Per-cpu initialization of the clockevent
  100. */
  101. static int hv_stimer_init(unsigned int cpu)
  102. {
  103. struct clock_event_device *ce;
  104. if (!hv_clock_event)
  105. return 0;
  106. ce = per_cpu_ptr(hv_clock_event, cpu);
  107. ce->name = "Hyper-V clockevent";
  108. ce->features = CLOCK_EVT_FEAT_ONESHOT;
  109. ce->cpumask = cpumask_of(cpu);
  110. ce->rating = 1000;
  111. ce->set_state_shutdown = hv_ce_shutdown;
  112. ce->set_state_oneshot = hv_ce_set_oneshot;
  113. ce->set_next_event = hv_ce_set_next_event;
  114. clockevents_config_and_register(ce,
  115. HV_CLOCK_HZ,
  116. HV_MIN_DELTA_TICKS,
  117. HV_MAX_MAX_DELTA_TICKS);
  118. return 0;
  119. }
  120. /*
  121. * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
  122. */
  123. int hv_stimer_cleanup(unsigned int cpu)
  124. {
  125. struct clock_event_device *ce;
  126. if (!hv_clock_event)
  127. return 0;
  128. /*
  129. * In the legacy case where Direct Mode is not enabled
  130. * (which can only be on x86/64), stimer cleanup happens
  131. * relatively early in the CPU offlining process. We
  132. * must unbind the stimer-based clockevent device so
  133. * that the LAPIC timer can take over until clockevents
  134. * are no longer needed in the offlining process. Note
  135. * that clockevents_unbind_device() eventually calls
  136. * hv_ce_shutdown().
  137. *
  138. * The unbind should not be done when Direct Mode is
  139. * enabled because we may be on an architecture where
  140. * there are no other clockevent devices to fallback to.
  141. */
  142. ce = per_cpu_ptr(hv_clock_event, cpu);
  143. if (direct_mode_enabled)
  144. hv_ce_shutdown(ce);
  145. else
  146. clockevents_unbind_device(ce, cpu);
  147. return 0;
  148. }
  149. EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
  150. /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
  151. int hv_stimer_alloc(void)
  152. {
  153. int ret = 0;
  154. /*
  155. * Synthetic timers are always available except on old versions of
  156. * Hyper-V on x86. In that case, return as error as Linux will use a
  157. * clockevent based on emulated LAPIC timer hardware.
  158. */
  159. if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
  160. return -EINVAL;
  161. hv_clock_event = alloc_percpu(struct clock_event_device);
  162. if (!hv_clock_event)
  163. return -ENOMEM;
  164. direct_mode_enabled = ms_hyperv.misc_features &
  165. HV_STIMER_DIRECT_MODE_AVAILABLE;
  166. if (direct_mode_enabled) {
  167. ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
  168. hv_stimer0_isr);
  169. if (ret)
  170. goto free_percpu;
  171. /*
  172. * Since we are in Direct Mode, stimer initialization
  173. * can be done now with a CPUHP value in the same range
  174. * as other clockevent devices.
  175. */
  176. ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
  177. "clockevents/hyperv/stimer:starting",
  178. hv_stimer_init, hv_stimer_cleanup);
  179. if (ret < 0)
  180. goto free_stimer0_irq;
  181. }
  182. return ret;
  183. free_stimer0_irq:
  184. hv_remove_stimer0_irq(stimer0_irq);
  185. stimer0_irq = 0;
  186. free_percpu:
  187. free_percpu(hv_clock_event);
  188. hv_clock_event = NULL;
  189. return ret;
  190. }
  191. EXPORT_SYMBOL_GPL(hv_stimer_alloc);
  192. /*
  193. * hv_stimer_legacy_init -- Called from the VMbus driver to handle
  194. * the case when Direct Mode is not enabled, and the stimer
  195. * must be initialized late in the CPU onlining process.
  196. *
  197. */
  198. void hv_stimer_legacy_init(unsigned int cpu, int sint)
  199. {
  200. if (direct_mode_enabled)
  201. return;
  202. /*
  203. * This function gets called by each vCPU, so setting the
  204. * global stimer_message_sint value each time is conceptually
  205. * not ideal, but the value passed in is always the same and
  206. * it avoids introducing yet another interface into this
  207. * clocksource driver just to set the sint in the legacy case.
  208. */
  209. stimer0_message_sint = sint;
  210. (void)hv_stimer_init(cpu);
  211. }
  212. EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
  213. /*
  214. * hv_stimer_legacy_cleanup -- Called from the VMbus driver to
  215. * handle the case when Direct Mode is not enabled, and the
  216. * stimer must be cleaned up early in the CPU offlining
  217. * process.
  218. */
  219. void hv_stimer_legacy_cleanup(unsigned int cpu)
  220. {
  221. if (direct_mode_enabled)
  222. return;
  223. (void)hv_stimer_cleanup(cpu);
  224. }
  225. EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
  226. /* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
  227. void hv_stimer_free(void)
  228. {
  229. if (!hv_clock_event)
  230. return;
  231. if (direct_mode_enabled) {
  232. cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
  233. hv_remove_stimer0_irq(stimer0_irq);
  234. stimer0_irq = 0;
  235. }
  236. free_percpu(hv_clock_event);
  237. hv_clock_event = NULL;
  238. }
  239. EXPORT_SYMBOL_GPL(hv_stimer_free);
  240. /*
  241. * Do a global cleanup of clockevents for the cases of kexec and
  242. * vmbus exit
  243. */
  244. void hv_stimer_global_cleanup(void)
  245. {
  246. int cpu;
  247. /*
  248. * hv_stime_legacy_cleanup() will stop the stimer if Direct
  249. * Mode is not enabled, and fallback to the LAPIC timer.
  250. */
  251. for_each_present_cpu(cpu) {
  252. hv_stimer_legacy_cleanup(cpu);
  253. }
  254. /*
  255. * If Direct Mode is enabled, the cpuhp teardown callback
  256. * (hv_stimer_cleanup) will be run on all CPUs to stop the
  257. * stimers.
  258. */
  259. hv_stimer_free();
  260. }
  261. EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
  262. /*
  263. * Code and definitions for the Hyper-V clocksources. Two
  264. * clocksources are defined: one that reads the Hyper-V defined MSR, and
  265. * the other that uses the TSC reference page feature as defined in the
  266. * TLFS. The MSR version is for compatibility with old versions of
  267. * Hyper-V and 32-bit x86. The TSC reference page version is preferred.
  268. *
  269. * The Hyper-V clocksource ratings of 250 are chosen to be below the
  270. * TSC clocksource rating of 300. In configurations where Hyper-V offers
  271. * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource
  272. * is available and preferred. With the higher rating, it will be the
  273. * default. On older hardware and Hyper-V versions, the TSC is marked
  274. * "unstable", so no TSC clocksource is created and the selected Hyper-V
  275. * clocksource will be the default.
  276. */
  277. u64 (*hv_read_reference_counter)(void);
  278. EXPORT_SYMBOL_GPL(hv_read_reference_counter);
  279. static union {
  280. struct ms_hyperv_tsc_page page;
  281. u8 reserved[PAGE_SIZE];
  282. } tsc_pg __aligned(PAGE_SIZE);
  283. struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
  284. {
  285. return &tsc_pg.page;
  286. }
  287. EXPORT_SYMBOL_GPL(hv_get_tsc_page);
  288. static u64 notrace read_hv_clock_tsc(void)
  289. {
  290. u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());
  291. if (current_tick == U64_MAX)
  292. hv_get_time_ref_count(current_tick);
  293. return current_tick;
  294. }
  295. static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
  296. {
  297. return read_hv_clock_tsc();
  298. }
  299. static u64 notrace read_hv_sched_clock_tsc(void)
  300. {
  301. return (read_hv_clock_tsc() - hv_sched_clock_offset) *
  302. (NSEC_PER_SEC / HV_CLOCK_HZ);
  303. }
  304. static void suspend_hv_clock_tsc(struct clocksource *arg)
  305. {
  306. u64 tsc_msr;
  307. /* Disable the TSC page */
  308. hv_get_reference_tsc(tsc_msr);
  309. tsc_msr &= ~BIT_ULL(0);
  310. hv_set_reference_tsc(tsc_msr);
  311. }
  312. static void resume_hv_clock_tsc(struct clocksource *arg)
  313. {
  314. phys_addr_t phys_addr = virt_to_phys(&tsc_pg);
  315. u64 tsc_msr;
  316. /* Re-enable the TSC page */
  317. hv_get_reference_tsc(tsc_msr);
  318. tsc_msr &= GENMASK_ULL(11, 0);
  319. tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
  320. hv_set_reference_tsc(tsc_msr);
  321. }
  322. static int hv_cs_enable(struct clocksource *cs)
  323. {
  324. hv_enable_vdso_clocksource();
  325. return 0;
  326. }
  327. static struct clocksource hyperv_cs_tsc = {
  328. .name = "hyperv_clocksource_tsc_page",
  329. .rating = 250,
  330. .read = read_hv_clock_tsc_cs,
  331. .mask = CLOCKSOURCE_MASK(64),
  332. .flags = CLOCK_SOURCE_IS_CONTINUOUS,
  333. .suspend= suspend_hv_clock_tsc,
  334. .resume = resume_hv_clock_tsc,
  335. .enable = hv_cs_enable,
  336. };
  337. static u64 notrace read_hv_clock_msr(void)
  338. {
  339. u64 current_tick;
  340. /*
  341. * Read the partition counter to get the current tick count. This count
  342. * is set to 0 when the partition is created and is incremented in
  343. * 100 nanosecond units.
  344. */
  345. hv_get_time_ref_count(current_tick);
  346. return current_tick;
  347. }
  348. static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
  349. {
  350. return read_hv_clock_msr();
  351. }
  352. static u64 notrace read_hv_sched_clock_msr(void)
  353. {
  354. return (read_hv_clock_msr() - hv_sched_clock_offset) *
  355. (NSEC_PER_SEC / HV_CLOCK_HZ);
  356. }
  357. static struct clocksource hyperv_cs_msr = {
  358. .name = "hyperv_clocksource_msr",
  359. .rating = 250,
  360. .read = read_hv_clock_msr_cs,
  361. .mask = CLOCKSOURCE_MASK(64),
  362. .flags = CLOCK_SOURCE_IS_CONTINUOUS,
  363. };
  364. static bool __init hv_init_tsc_clocksource(void)
  365. {
  366. u64 tsc_msr;
  367. phys_addr_t phys_addr;
  368. if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
  369. return false;
  370. hv_read_reference_counter = read_hv_clock_tsc;
  371. phys_addr = virt_to_phys(hv_get_tsc_page());
  372. /*
  373. * The Hyper-V TLFS specifies to preserve the value of reserved
  374. * bits in registers. So read the existing value, preserve the
  375. * low order 12 bits, and add in the guest physical address
  376. * (which already has at least the low 12 bits set to zero since
  377. * it is page aligned). Also set the "enable" bit, which is bit 0.
  378. */
  379. hv_get_reference_tsc(tsc_msr);
  380. tsc_msr &= GENMASK_ULL(11, 0);
  381. tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
  382. hv_set_reference_tsc(tsc_msr);
  383. hv_set_clocksource_vdso(hyperv_cs_tsc);
  384. clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
  385. hv_sched_clock_offset = hv_read_reference_counter();
  386. hv_setup_sched_clock(read_hv_sched_clock_tsc);
  387. return true;
  388. }
  389. void __init hv_init_clocksource(void)
  390. {
  391. /*
  392. * Try to set up the TSC page clocksource. If it succeeds, we're
  393. * done. Otherwise, set up the MSR clocksoruce. At least one of
  394. * these will always be available except on very old versions of
  395. * Hyper-V on x86. In that case we won't have a Hyper-V
  396. * clocksource, but Linux will still run with a clocksource based
  397. * on the emulated PIT or LAPIC timer.
  398. */
  399. if (hv_init_tsc_clocksource())
  400. return;
  401. if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
  402. return;
  403. hv_read_reference_counter = read_hv_clock_msr;
  404. clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
  405. hv_sched_clock_offset = hv_read_reference_counter();
  406. hv_setup_sched_clock(read_hv_sched_clock_msr);
  407. }
  408. EXPORT_SYMBOL_GPL(hv_init_clocksource);