PageRenderTime 373ms CodeModel.GetById 67ms app.highlight 248ms RepoModel.GetById 0ms app.codeStats 1ms

/arch/x86/kvm/svm.c

https://bitbucket.org/thekraven/iscream_thunderc-2.6.35
C | 3428 lines | 2605 code | 613 blank | 210 comment | 283 complexity | c032176705d9a327e97ba8363317307d MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Kernel-based Virtual Machine driver for Linux
   3 *
   4 * AMD SVM support
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 *
   8 * Authors:
   9 *   Yaniv Kamay  <yaniv@qumranet.com>
  10 *   Avi Kivity   <avi@qumranet.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2.  See
  13 * the COPYING file in the top-level directory.
  14 *
  15 */
  16#include <linux/kvm_host.h>
  17
  18#include "irq.h"
  19#include "mmu.h"
  20#include "kvm_cache_regs.h"
  21#include "x86.h"
  22
  23#include <linux/module.h>
  24#include <linux/kernel.h>
  25#include <linux/vmalloc.h>
  26#include <linux/highmem.h>
  27#include <linux/sched.h>
  28#include <linux/ftrace_event.h>
  29#include <linux/slab.h>
  30
  31#include <asm/tlbflush.h>
  32#include <asm/desc.h>
  33
  34#include <asm/virtext.h>
  35#include "trace.h"
  36
  37#define __ex(x) __kvm_handle_fault_on_reboot(x)
  38
  39MODULE_AUTHOR("Qumranet");
  40MODULE_LICENSE("GPL");
  41
  42#define IOPM_ALLOC_ORDER 2
  43#define MSRPM_ALLOC_ORDER 1
  44
  45#define SEG_TYPE_LDT 2
  46#define SEG_TYPE_BUSY_TSS16 3
  47
  48#define SVM_FEATURE_NPT            (1 <<  0)
  49#define SVM_FEATURE_LBRV           (1 <<  1)
  50#define SVM_FEATURE_SVML           (1 <<  2)
  51#define SVM_FEATURE_NRIP           (1 <<  3)
  52#define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
  53
  54#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
  55#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
  56#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
  57
  58#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
  59
  60static bool erratum_383_found __read_mostly;
  61
  62static const u32 host_save_user_msrs[] = {
  63#ifdef CONFIG_X86_64
  64	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
  65	MSR_FS_BASE,
  66#endif
  67	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
  68};
  69
  70#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
  71
  72struct kvm_vcpu;
  73
  74struct nested_state {
  75	struct vmcb *hsave;
  76	u64 hsave_msr;
  77	u64 vm_cr_msr;
  78	u64 vmcb;
  79
  80	/* These are the merged vectors */
  81	u32 *msrpm;
  82
  83	/* gpa pointers to the real vectors */
  84	u64 vmcb_msrpm;
  85	u64 vmcb_iopm;
  86
  87	/* A VMEXIT is required but not yet emulated */
  88	bool exit_required;
  89
  90	/* cache for intercepts of the guest */
  91	u16 intercept_cr_read;
  92	u16 intercept_cr_write;
  93	u16 intercept_dr_read;
  94	u16 intercept_dr_write;
  95	u32 intercept_exceptions;
  96	u64 intercept;
  97
  98};
  99
 100#define MSRPM_OFFSETS	16
 101static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 102
 103struct vcpu_svm {
 104	struct kvm_vcpu vcpu;
 105	struct vmcb *vmcb;
 106	unsigned long vmcb_pa;
 107	struct svm_cpu_data *svm_data;
 108	uint64_t asid_generation;
 109	uint64_t sysenter_esp;
 110	uint64_t sysenter_eip;
 111
 112	u64 next_rip;
 113
 114	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
 115	u64 host_gs_base;
 116
 117	u32 *msrpm;
 118
 119	struct nested_state nested;
 120
 121	bool nmi_singlestep;
 122
 123	unsigned int3_injected;
 124	unsigned long int3_rip;
 125};
 126
 127#define MSR_INVALID			0xffffffffU
 128
 129static struct svm_direct_access_msrs {
 130	u32 index;   /* Index of the MSR */
 131	bool always; /* True if intercept is always on */
 132} direct_access_msrs[] = {
 133	{ .index = MSR_K6_STAR,				.always = true  },
 134	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
 135#ifdef CONFIG_X86_64
 136	{ .index = MSR_GS_BASE,				.always = true  },
 137	{ .index = MSR_FS_BASE,				.always = true  },
 138	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
 139	{ .index = MSR_LSTAR,				.always = true  },
 140	{ .index = MSR_CSTAR,				.always = true  },
 141	{ .index = MSR_SYSCALL_MASK,			.always = true  },
 142#endif
 143	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
 144	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
 145	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
 146	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
 147	{ .index = MSR_INVALID,				.always = false },
 148};
 149
 150/* enable NPT for AMD64 and X86 with PAE */
 151#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 152static bool npt_enabled = true;
 153#else
 154static bool npt_enabled;
 155#endif
 156static int npt = 1;
 157
 158module_param(npt, int, S_IRUGO);
 159
 160static int nested = 1;
 161module_param(nested, int, S_IRUGO);
 162
 163static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 164static void svm_complete_interrupts(struct vcpu_svm *svm);
 165
 166static int nested_svm_exit_handled(struct vcpu_svm *svm);
 167static int nested_svm_intercept(struct vcpu_svm *svm);
 168static int nested_svm_vmexit(struct vcpu_svm *svm);
 169static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 170				      bool has_error_code, u32 error_code);
 171
 172static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 173{
 174	return container_of(vcpu, struct vcpu_svm, vcpu);
 175}
 176
 177static inline bool is_nested(struct vcpu_svm *svm)
 178{
 179	return svm->nested.vmcb;
 180}
 181
 182static inline void enable_gif(struct vcpu_svm *svm)
 183{
 184	svm->vcpu.arch.hflags |= HF_GIF_MASK;
 185}
 186
 187static inline void disable_gif(struct vcpu_svm *svm)
 188{
 189	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
 190}
 191
 192static inline bool gif_set(struct vcpu_svm *svm)
 193{
 194	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
 195}
 196
 197static unsigned long iopm_base;
 198
 199struct kvm_ldttss_desc {
 200	u16 limit0;
 201	u16 base0;
 202	unsigned base1:8, type:5, dpl:2, p:1;
 203	unsigned limit1:4, zero0:3, g:1, base2:8;
 204	u32 base3;
 205	u32 zero1;
 206} __attribute__((packed));
 207
 208struct svm_cpu_data {
 209	int cpu;
 210
 211	u64 asid_generation;
 212	u32 max_asid;
 213	u32 next_asid;
 214	struct kvm_ldttss_desc *tss_desc;
 215
 216	struct page *save_area;
 217};
 218
 219static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
 220static uint32_t svm_features;
 221
 222struct svm_init_data {
 223	int cpu;
 224	int r;
 225};
 226
 227static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 228
 229#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
 230#define MSRS_RANGE_SIZE 2048
 231#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
 232
 233static u32 svm_msrpm_offset(u32 msr)
 234{
 235	u32 offset;
 236	int i;
 237
 238	for (i = 0; i < NUM_MSR_MAPS; i++) {
 239		if (msr < msrpm_ranges[i] ||
 240		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
 241			continue;
 242
 243		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
 244		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
 245
 246		/* Now we have the u8 offset - but need the u32 offset */
 247		return offset / 4;
 248	}
 249
 250	/* MSR not in any range */
 251	return MSR_INVALID;
 252}
 253
 254#define MAX_INST_SIZE 15
 255
 256static inline u32 svm_has(u32 feat)
 257{
 258	return svm_features & feat;
 259}
 260
 261static inline void clgi(void)
 262{
 263	asm volatile (__ex(SVM_CLGI));
 264}
 265
 266static inline void stgi(void)
 267{
 268	asm volatile (__ex(SVM_STGI));
 269}
 270
 271static inline void invlpga(unsigned long addr, u32 asid)
 272{
 273	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
 274}
 275
 276static inline void force_new_asid(struct kvm_vcpu *vcpu)
 277{
 278	to_svm(vcpu)->asid_generation--;
 279}
 280
 281static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
 282{
 283	force_new_asid(vcpu);
 284}
 285
 286static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 287{
 288	if (!npt_enabled && !(efer & EFER_LMA))
 289		efer &= ~EFER_LME;
 290
 291	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
 292	vcpu->arch.efer = efer;
 293}
 294
 295static int is_external_interrupt(u32 info)
 296{
 297	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
 298	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 299}
 300
 301static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 302{
 303	struct vcpu_svm *svm = to_svm(vcpu);
 304	u32 ret = 0;
 305
 306	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
 307		ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
 308	return ret & mask;
 309}
 310
 311static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
 312{
 313	struct vcpu_svm *svm = to_svm(vcpu);
 314
 315	if (mask == 0)
 316		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
 317	else
 318		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
 319
 320}
 321
 322static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 323{
 324	struct vcpu_svm *svm = to_svm(vcpu);
 325
 326	if (svm->vmcb->control.next_rip != 0)
 327		svm->next_rip = svm->vmcb->control.next_rip;
 328
 329	if (!svm->next_rip) {
 330		if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
 331				EMULATE_DONE)
 332			printk(KERN_DEBUG "%s: NOP\n", __func__);
 333		return;
 334	}
 335	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
 336		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
 337		       __func__, kvm_rip_read(vcpu), svm->next_rip);
 338
 339	kvm_rip_write(vcpu, svm->next_rip);
 340	svm_set_interrupt_shadow(vcpu, 0);
 341}
 342
 343static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 344				bool has_error_code, u32 error_code,
 345				bool reinject)
 346{
 347	struct vcpu_svm *svm = to_svm(vcpu);
 348
 349	/*
 350	 * If we are within a nested VM we'd better #VMEXIT and let the guest
 351	 * handle the exception
 352	 */
 353	if (!reinject &&
 354	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
 355		return;
 356
 357	if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
 358		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
 359
 360		/*
 361		 * For guest debugging where we have to reinject #BP if some
 362		 * INT3 is guest-owned:
 363		 * Emulate nRIP by moving RIP forward. Will fail if injection
 364		 * raises a fault that is not intercepted. Still better than
 365		 * failing in all cases.
 366		 */
 367		skip_emulated_instruction(&svm->vcpu);
 368		rip = kvm_rip_read(&svm->vcpu);
 369		svm->int3_rip = rip + svm->vmcb->save.cs.base;
 370		svm->int3_injected = rip - old_rip;
 371	}
 372
 373	svm->vmcb->control.event_inj = nr
 374		| SVM_EVTINJ_VALID
 375		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
 376		| SVM_EVTINJ_TYPE_EXEPT;
 377	svm->vmcb->control.event_inj_err = error_code;
 378}
 379
 380static void svm_init_erratum_383(void)
 381{
 382	u32 low, high;
 383	int err;
 384	u64 val;
 385
 386	/* Only Fam10h is affected */
 387	if (boot_cpu_data.x86 != 0x10)
 388		return;
 389
 390	/* Use _safe variants to not break nested virtualization */
 391	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
 392	if (err)
 393		return;
 394
 395	val |= (1ULL << 47);
 396
 397	low  = lower_32_bits(val);
 398	high = upper_32_bits(val);
 399
 400	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
 401
 402	erratum_383_found = true;
 403}
 404
 405static int has_svm(void)
 406{
 407	const char *msg;
 408
 409	if (!cpu_has_svm(&msg)) {
 410		printk(KERN_INFO "has_svm: %s\n", msg);
 411		return 0;
 412	}
 413
 414	return 1;
 415}
 416
 417static void svm_hardware_disable(void *garbage)
 418{
 419	cpu_svm_disable();
 420}
 421
 422static int svm_hardware_enable(void *garbage)
 423{
 424
 425	struct svm_cpu_data *sd;
 426	uint64_t efer;
 427	struct desc_ptr gdt_descr;
 428	struct desc_struct *gdt;
 429	int me = raw_smp_processor_id();
 430
 431	rdmsrl(MSR_EFER, efer);
 432	if (efer & EFER_SVME)
 433		return -EBUSY;
 434
 435	if (!has_svm()) {
 436		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
 437		       me);
 438		return -EINVAL;
 439	}
 440	sd = per_cpu(svm_data, me);
 441
 442	if (!sd) {
 443		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
 444		       me);
 445		return -EINVAL;
 446	}
 447
 448	sd->asid_generation = 1;
 449	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 450	sd->next_asid = sd->max_asid + 1;
 451
 452	native_store_gdt(&gdt_descr);
 453	gdt = (struct desc_struct *)gdt_descr.address;
 454	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 455
 456	wrmsrl(MSR_EFER, efer | EFER_SVME);
 457
 458	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
 459
 460	svm_init_erratum_383();
 461
 462	return 0;
 463}
 464
 465static void svm_cpu_uninit(int cpu)
 466{
 467	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
 468
 469	if (!sd)
 470		return;
 471
 472	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
 473	__free_page(sd->save_area);
 474	kfree(sd);
 475}
 476
 477static int svm_cpu_init(int cpu)
 478{
 479	struct svm_cpu_data *sd;
 480	int r;
 481
 482	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
 483	if (!sd)
 484		return -ENOMEM;
 485	sd->cpu = cpu;
 486	sd->save_area = alloc_page(GFP_KERNEL);
 487	r = -ENOMEM;
 488	if (!sd->save_area)
 489		goto err_1;
 490
 491	per_cpu(svm_data, cpu) = sd;
 492
 493	return 0;
 494
 495err_1:
 496	kfree(sd);
 497	return r;
 498
 499}
 500
 501static bool valid_msr_intercept(u32 index)
 502{
 503	int i;
 504
 505	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
 506		if (direct_access_msrs[i].index == index)
 507			return true;
 508
 509	return false;
 510}
 511
 512static void set_msr_interception(u32 *msrpm, unsigned msr,
 513				 int read, int write)
 514{
 515	u8 bit_read, bit_write;
 516	unsigned long tmp;
 517	u32 offset;
 518
 519	/*
 520	 * If this warning triggers extend the direct_access_msrs list at the
 521	 * beginning of the file
 522	 */
 523	WARN_ON(!valid_msr_intercept(msr));
 524
 525	offset    = svm_msrpm_offset(msr);
 526	bit_read  = 2 * (msr & 0x0f);
 527	bit_write = 2 * (msr & 0x0f) + 1;
 528	tmp       = msrpm[offset];
 529
 530	BUG_ON(offset == MSR_INVALID);
 531
 532	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
 533	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
 534
 535	msrpm[offset] = tmp;
 536}
 537
 538static void svm_vcpu_init_msrpm(u32 *msrpm)
 539{
 540	int i;
 541
 542	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
 543
 544	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
 545		if (!direct_access_msrs[i].always)
 546			continue;
 547
 548		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
 549	}
 550}
 551
 552static void add_msr_offset(u32 offset)
 553{
 554	int i;
 555
 556	for (i = 0; i < MSRPM_OFFSETS; ++i) {
 557
 558		/* Offset already in list? */
 559		if (msrpm_offsets[i] == offset)
 560			return;
 561
 562		/* Slot used by another offset? */
 563		if (msrpm_offsets[i] != MSR_INVALID)
 564			continue;
 565
 566		/* Add offset to list */
 567		msrpm_offsets[i] = offset;
 568
 569		return;
 570	}
 571
 572	/*
 573	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
 574	 * increase MSRPM_OFFSETS in this case.
 575	 */
 576	BUG();
 577}
 578
 579static void init_msrpm_offsets(void)
 580{
 581	int i;
 582
 583	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
 584
 585	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
 586		u32 offset;
 587
 588		offset = svm_msrpm_offset(direct_access_msrs[i].index);
 589		BUG_ON(offset == MSR_INVALID);
 590
 591		add_msr_offset(offset);
 592	}
 593}
 594
 595static void svm_enable_lbrv(struct vcpu_svm *svm)
 596{
 597	u32 *msrpm = svm->msrpm;
 598
 599	svm->vmcb->control.lbr_ctl = 1;
 600	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
 601	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
 602	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
 603	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
 604}
 605
 606static void svm_disable_lbrv(struct vcpu_svm *svm)
 607{
 608	u32 *msrpm = svm->msrpm;
 609
 610	svm->vmcb->control.lbr_ctl = 0;
 611	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
 612	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
 613	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
 614	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 615}
 616
 617static __init int svm_hardware_setup(void)
 618{
 619	int cpu;
 620	struct page *iopm_pages;
 621	void *iopm_va;
 622	int r;
 623
 624	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
 625
 626	if (!iopm_pages)
 627		return -ENOMEM;
 628
 629	iopm_va = page_address(iopm_pages);
 630	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
 631	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
 632
 633	init_msrpm_offsets();
 634
 635	if (boot_cpu_has(X86_FEATURE_NX))
 636		kvm_enable_efer_bits(EFER_NX);
 637
 638	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
 639		kvm_enable_efer_bits(EFER_FFXSR);
 640
 641	if (nested) {
 642		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 643		kvm_enable_efer_bits(EFER_SVME);
 644	}
 645
 646	for_each_possible_cpu(cpu) {
 647		r = svm_cpu_init(cpu);
 648		if (r)
 649			goto err;
 650	}
 651
 652	svm_features = cpuid_edx(SVM_CPUID_FUNC);
 653
 654	if (!svm_has(SVM_FEATURE_NPT))
 655		npt_enabled = false;
 656
 657	if (npt_enabled && !npt) {
 658		printk(KERN_INFO "kvm: Nested Paging disabled\n");
 659		npt_enabled = false;
 660	}
 661
 662	if (npt_enabled) {
 663		printk(KERN_INFO "kvm: Nested Paging enabled\n");
 664		kvm_enable_tdp();
 665	} else
 666		kvm_disable_tdp();
 667
 668	return 0;
 669
 670err:
 671	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
 672	iopm_base = 0;
 673	return r;
 674}
 675
 676static __exit void svm_hardware_unsetup(void)
 677{
 678	int cpu;
 679
 680	for_each_possible_cpu(cpu)
 681		svm_cpu_uninit(cpu);
 682
 683	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
 684	iopm_base = 0;
 685}
 686
 687static void init_seg(struct vmcb_seg *seg)
 688{
 689	seg->selector = 0;
 690	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
 691		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
 692	seg->limit = 0xffff;
 693	seg->base = 0;
 694}
 695
 696static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 697{
 698	seg->selector = 0;
 699	seg->attrib = SVM_SELECTOR_P_MASK | type;
 700	seg->limit = 0xffff;
 701	seg->base = 0;
 702}
 703
 704static void init_vmcb(struct vcpu_svm *svm)
 705{
 706	struct vmcb_control_area *control = &svm->vmcb->control;
 707	struct vmcb_save_area *save = &svm->vmcb->save;
 708
 709	svm->vcpu.fpu_active = 1;
 710
 711	control->intercept_cr_read =	INTERCEPT_CR0_MASK |
 712					INTERCEPT_CR3_MASK |
 713					INTERCEPT_CR4_MASK;
 714
 715	control->intercept_cr_write =	INTERCEPT_CR0_MASK |
 716					INTERCEPT_CR3_MASK |
 717					INTERCEPT_CR4_MASK |
 718					INTERCEPT_CR8_MASK;
 719
 720	control->intercept_dr_read =	INTERCEPT_DR0_MASK |
 721					INTERCEPT_DR1_MASK |
 722					INTERCEPT_DR2_MASK |
 723					INTERCEPT_DR3_MASK |
 724					INTERCEPT_DR4_MASK |
 725					INTERCEPT_DR5_MASK |
 726					INTERCEPT_DR6_MASK |
 727					INTERCEPT_DR7_MASK;
 728
 729	control->intercept_dr_write =	INTERCEPT_DR0_MASK |
 730					INTERCEPT_DR1_MASK |
 731					INTERCEPT_DR2_MASK |
 732					INTERCEPT_DR3_MASK |
 733					INTERCEPT_DR4_MASK |
 734					INTERCEPT_DR5_MASK |
 735					INTERCEPT_DR6_MASK |
 736					INTERCEPT_DR7_MASK;
 737
 738	control->intercept_exceptions = (1 << PF_VECTOR) |
 739					(1 << UD_VECTOR) |
 740					(1 << MC_VECTOR);
 741
 742
 743	control->intercept =	(1ULL << INTERCEPT_INTR) |
 744				(1ULL << INTERCEPT_NMI) |
 745				(1ULL << INTERCEPT_SMI) |
 746				(1ULL << INTERCEPT_SELECTIVE_CR0) |
 747				(1ULL << INTERCEPT_CPUID) |
 748				(1ULL << INTERCEPT_INVD) |
 749				(1ULL << INTERCEPT_HLT) |
 750				(1ULL << INTERCEPT_INVLPG) |
 751				(1ULL << INTERCEPT_INVLPGA) |
 752				(1ULL << INTERCEPT_IOIO_PROT) |
 753				(1ULL << INTERCEPT_MSR_PROT) |
 754				(1ULL << INTERCEPT_TASK_SWITCH) |
 755				(1ULL << INTERCEPT_SHUTDOWN) |
 756				(1ULL << INTERCEPT_VMRUN) |
 757				(1ULL << INTERCEPT_VMMCALL) |
 758				(1ULL << INTERCEPT_VMLOAD) |
 759				(1ULL << INTERCEPT_VMSAVE) |
 760				(1ULL << INTERCEPT_STGI) |
 761				(1ULL << INTERCEPT_CLGI) |
 762				(1ULL << INTERCEPT_SKINIT) |
 763				(1ULL << INTERCEPT_WBINVD) |
 764				(1ULL << INTERCEPT_MONITOR) |
 765				(1ULL << INTERCEPT_MWAIT);
 766
 767	control->iopm_base_pa = iopm_base;
 768	control->msrpm_base_pa = __pa(svm->msrpm);
 769	control->tsc_offset = 0;
 770	control->int_ctl = V_INTR_MASKING_MASK;
 771
 772	init_seg(&save->es);
 773	init_seg(&save->ss);
 774	init_seg(&save->ds);
 775	init_seg(&save->fs);
 776	init_seg(&save->gs);
 777
 778	save->cs.selector = 0xf000;
 779	/* Executable/Readable Code Segment */
 780	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
 781		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
 782	save->cs.limit = 0xffff;
 783	/*
 784	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
 785	 * be consistent with it.
 786	 *
 787	 * Replace when we have real mode working for vmx.
 788	 */
 789	save->cs.base = 0xf0000;
 790
 791	save->gdtr.limit = 0xffff;
 792	save->idtr.limit = 0xffff;
 793
 794	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 795	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 796
 797	save->efer = EFER_SVME;
 798	save->dr6 = 0xffff0ff0;
 799	save->dr7 = 0x400;
 800	save->rflags = 2;
 801	save->rip = 0x0000fff0;
 802	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
 803
 804	/*
 805	 * This is the guest-visible cr0 value.
 806	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
 807	 */
 808	svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
 809	kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
 810
 811	save->cr4 = X86_CR4_PAE;
 812	/* rdx = ?? */
 813
 814	if (npt_enabled) {
 815		/* Setup VMCB for Nested Paging */
 816		control->nested_ctl = 1;
 817		control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
 818					(1ULL << INTERCEPT_INVLPG));
 819		control->intercept_exceptions &= ~(1 << PF_VECTOR);
 820		control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
 821		control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
 822		save->g_pat = 0x0007040600070406ULL;
 823		save->cr3 = 0;
 824		save->cr4 = 0;
 825	}
 826	force_new_asid(&svm->vcpu);
 827
 828	svm->nested.vmcb = 0;
 829	svm->vcpu.arch.hflags = 0;
 830
 831	if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
 832		control->pause_filter_count = 3000;
 833		control->intercept |= (1ULL << INTERCEPT_PAUSE);
 834	}
 835
 836	enable_gif(svm);
 837}
 838
 839static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 840{
 841	struct vcpu_svm *svm = to_svm(vcpu);
 842
 843	init_vmcb(svm);
 844
 845	if (!kvm_vcpu_is_bsp(vcpu)) {
 846		kvm_rip_write(vcpu, 0);
 847		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
 848		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
 849	}
 850	vcpu->arch.regs_avail = ~0;
 851	vcpu->arch.regs_dirty = ~0;
 852
 853	return 0;
 854}
 855
 856static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 857{
 858	struct vcpu_svm *svm;
 859	struct page *page;
 860	struct page *msrpm_pages;
 861	struct page *hsave_page;
 862	struct page *nested_msrpm_pages;
 863	int err;
 864
 865	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 866	if (!svm) {
 867		err = -ENOMEM;
 868		goto out;
 869	}
 870
 871	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
 872	if (err)
 873		goto free_svm;
 874
 875	err = -ENOMEM;
 876	page = alloc_page(GFP_KERNEL);
 877	if (!page)
 878		goto uninit;
 879
 880	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
 881	if (!msrpm_pages)
 882		goto free_page1;
 883
 884	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
 885	if (!nested_msrpm_pages)
 886		goto free_page2;
 887
 888	hsave_page = alloc_page(GFP_KERNEL);
 889	if (!hsave_page)
 890		goto free_page3;
 891
 892	svm->nested.hsave = page_address(hsave_page);
 893
 894	svm->msrpm = page_address(msrpm_pages);
 895	svm_vcpu_init_msrpm(svm->msrpm);
 896
 897	svm->nested.msrpm = page_address(nested_msrpm_pages);
 898	svm_vcpu_init_msrpm(svm->nested.msrpm);
 899
 900	svm->vmcb = page_address(page);
 901	clear_page(svm->vmcb);
 902	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 903	svm->asid_generation = 0;
 904	init_vmcb(svm);
 905
 906	fx_init(&svm->vcpu);
 907	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 908	if (kvm_vcpu_is_bsp(&svm->vcpu))
 909		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 910
 911	return &svm->vcpu;
 912
 913free_page3:
 914	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
 915free_page2:
 916	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
 917free_page1:
 918	__free_page(page);
 919uninit:
 920	kvm_vcpu_uninit(&svm->vcpu);
 921free_svm:
 922	kmem_cache_free(kvm_vcpu_cache, svm);
 923out:
 924	return ERR_PTR(err);
 925}
 926
 927static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 928{
 929	struct vcpu_svm *svm = to_svm(vcpu);
 930
 931	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
 932	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 933	__free_page(virt_to_page(svm->nested.hsave));
 934	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 935	kvm_vcpu_uninit(vcpu);
 936	kmem_cache_free(kvm_vcpu_cache, svm);
 937}
 938
 939static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 940{
 941	struct vcpu_svm *svm = to_svm(vcpu);
 942	int i;
 943
 944	if (unlikely(cpu != vcpu->cpu)) {
 945		u64 delta;
 946
 947		if (check_tsc_unstable()) {
 948			/*
 949			 * Make sure that the guest sees a monotonically
 950			 * increasing TSC.
 951			 */
 952			delta = vcpu->arch.host_tsc - native_read_tsc();
 953			svm->vmcb->control.tsc_offset += delta;
 954			if (is_nested(svm))
 955				svm->nested.hsave->control.tsc_offset += delta;
 956		}
 957		vcpu->cpu = cpu;
 958		kvm_migrate_timers(vcpu);
 959		svm->asid_generation = 0;
 960	}
 961
 962	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
 963		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 964}
 965
 966static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 967{
 968	struct vcpu_svm *svm = to_svm(vcpu);
 969	int i;
 970
 971	++vcpu->stat.host_state_reload;
 972	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
 973		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 974
 975	vcpu->arch.host_tsc = native_read_tsc();
 976}
 977
 978static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
 979{
 980	return to_svm(vcpu)->vmcb->save.rflags;
 981}
 982
 983static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 984{
 985	to_svm(vcpu)->vmcb->save.rflags = rflags;
 986}
 987
 988static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 989{
 990	switch (reg) {
 991	case VCPU_EXREG_PDPTR:
 992		BUG_ON(!npt_enabled);
 993		load_pdptrs(vcpu, vcpu->arch.cr3);
 994		break;
 995	default:
 996		BUG();
 997	}
 998}
 999
1000static void svm_set_vintr(struct vcpu_svm *svm)
1001{
1002	svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
1003}
1004
1005static void svm_clear_vintr(struct vcpu_svm *svm)
1006{
1007	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
1008}
1009
1010static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1011{
1012	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1013
1014	switch (seg) {
1015	case VCPU_SREG_CS: return &save->cs;
1016	case VCPU_SREG_DS: return &save->ds;
1017	case VCPU_SREG_ES: return &save->es;
1018	case VCPU_SREG_FS: return &save->fs;
1019	case VCPU_SREG_GS: return &save->gs;
1020	case VCPU_SREG_SS: return &save->ss;
1021	case VCPU_SREG_TR: return &save->tr;
1022	case VCPU_SREG_LDTR: return &save->ldtr;
1023	}
1024	BUG();
1025	return NULL;
1026}
1027
1028static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1029{
1030	struct vmcb_seg *s = svm_seg(vcpu, seg);
1031
1032	return s->base;
1033}
1034
1035static void svm_get_segment(struct kvm_vcpu *vcpu,
1036			    struct kvm_segment *var, int seg)
1037{
1038	struct vmcb_seg *s = svm_seg(vcpu, seg);
1039
1040	var->base = s->base;
1041	var->limit = s->limit;
1042	var->selector = s->selector;
1043	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1044	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1045	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1046	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1047	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1048	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1049	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1050	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1051
1052	/*
1053	 * AMD's VMCB does not have an explicit unusable field, so emulate it
1054	 * for cross vendor migration purposes by "not present"
1055	 */
1056	var->unusable = !var->present || (var->type == 0);
1057
1058	switch (seg) {
1059	case VCPU_SREG_CS:
1060		/*
1061		 * SVM always stores 0 for the 'G' bit in the CS selector in
1062		 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1063		 * Intel's VMENTRY has a check on the 'G' bit.
1064		 */
1065		var->g = s->limit > 0xfffff;
1066		break;
1067	case VCPU_SREG_TR:
1068		/*
1069		 * Work around a bug where the busy flag in the tr selector
1070		 * isn't exposed
1071		 */
1072		var->type |= 0x2;
1073		break;
1074	case VCPU_SREG_DS:
1075	case VCPU_SREG_ES:
1076	case VCPU_SREG_FS:
1077	case VCPU_SREG_GS:
1078		/*
1079		 * The accessed bit must always be set in the segment
1080		 * descriptor cache, although it can be cleared in the
1081		 * descriptor, the cached bit always remains at 1. Since
1082		 * Intel has a check on this, set it here to support
1083		 * cross-vendor migration.
1084		 */
1085		if (!var->unusable)
1086			var->type |= 0x1;
1087		break;
1088	case VCPU_SREG_SS:
1089		/*
1090		 * On AMD CPUs sometimes the DB bit in the segment
1091		 * descriptor is left as 1, although the whole segment has
1092		 * been made unusable. Clear it here to pass an Intel VMX
1093		 * entry check when cross vendor migrating.
1094		 */
1095		if (var->unusable)
1096			var->db = 0;
1097		break;
1098	}
1099}
1100
1101static int svm_get_cpl(struct kvm_vcpu *vcpu)
1102{
1103	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1104
1105	return save->cpl;
1106}
1107
1108static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1109{
1110	struct vcpu_svm *svm = to_svm(vcpu);
1111
1112	dt->size = svm->vmcb->save.idtr.limit;
1113	dt->address = svm->vmcb->save.idtr.base;
1114}
1115
1116static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1117{
1118	struct vcpu_svm *svm = to_svm(vcpu);
1119
1120	svm->vmcb->save.idtr.limit = dt->size;
1121	svm->vmcb->save.idtr.base = dt->address ;
1122}
1123
1124static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1125{
1126	struct vcpu_svm *svm = to_svm(vcpu);
1127
1128	dt->size = svm->vmcb->save.gdtr.limit;
1129	dt->address = svm->vmcb->save.gdtr.base;
1130}
1131
1132static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1133{
1134	struct vcpu_svm *svm = to_svm(vcpu);
1135
1136	svm->vmcb->save.gdtr.limit = dt->size;
1137	svm->vmcb->save.gdtr.base = dt->address ;
1138}
1139
1140static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1141{
1142}
1143
1144static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1145{
1146}
1147
1148static void update_cr0_intercept(struct vcpu_svm *svm)
1149{
1150	struct vmcb *vmcb = svm->vmcb;
1151	ulong gcr0 = svm->vcpu.arch.cr0;
1152	u64 *hcr0 = &svm->vmcb->save.cr0;
1153
1154	if (!svm->vcpu.fpu_active)
1155		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
1156	else
1157		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1158			| (gcr0 & SVM_CR0_SELECTIVE_MASK);
1159
1160
1161	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1162		vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
1163		vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1164		if (is_nested(svm)) {
1165			struct vmcb *hsave = svm->nested.hsave;
1166
1167			hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
1168			hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1169			vmcb->control.intercept_cr_read  |= svm->nested.intercept_cr_read;
1170			vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
1171		}
1172	} else {
1173		svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1174		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1175		if (is_nested(svm)) {
1176			struct vmcb *hsave = svm->nested.hsave;
1177
1178			hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1179			hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1180		}
1181	}
1182}
1183
1184static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1185{
1186	struct vcpu_svm *svm = to_svm(vcpu);
1187
1188	if (is_nested(svm)) {
1189		/*
1190		 * We are here because we run in nested mode, the host kvm
1191		 * intercepts cr0 writes but the l1 hypervisor does not.
1192		 * But the L1 hypervisor may intercept selective cr0 writes.
1193		 * This needs to be checked here.
1194		 */
1195		unsigned long old, new;
1196
1197		/* Remove bits that would trigger a real cr0 write intercept */
1198		old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1199		new = cr0 & SVM_CR0_SELECTIVE_MASK;
1200
1201		if (old == new) {
1202			/* cr0 write with ts and mp unchanged */
1203			svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1204			if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
1205				return;
1206		}
1207	}
1208
1209#ifdef CONFIG_X86_64
1210	if (vcpu->arch.efer & EFER_LME) {
1211		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1212			vcpu->arch.efer |= EFER_LMA;
1213			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1214		}
1215
1216		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1217			vcpu->arch.efer &= ~EFER_LMA;
1218			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1219		}
1220	}
1221#endif
1222	vcpu->arch.cr0 = cr0;
1223
1224	if (!npt_enabled)
1225		cr0 |= X86_CR0_PG | X86_CR0_WP;
1226
1227	if (!vcpu->fpu_active)
1228		cr0 |= X86_CR0_TS;
1229	/*
1230	 * re-enable caching here because the QEMU bios
1231	 * does not do it - this results in some delay at
1232	 * reboot
1233	 */
1234	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1235	svm->vmcb->save.cr0 = cr0;
1236	update_cr0_intercept(svm);
1237}
1238
1239static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1240{
1241	unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1242	unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1243
1244	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1245		force_new_asid(vcpu);
1246
1247	vcpu->arch.cr4 = cr4;
1248	if (!npt_enabled)
1249		cr4 |= X86_CR4_PAE;
1250	cr4 |= host_cr4_mce;
1251	to_svm(vcpu)->vmcb->save.cr4 = cr4;
1252}
1253
1254static void svm_set_segment(struct kvm_vcpu *vcpu,
1255			    struct kvm_segment *var, int seg)
1256{
1257	struct vcpu_svm *svm = to_svm(vcpu);
1258	struct vmcb_seg *s = svm_seg(vcpu, seg);
1259
1260	s->base = var->base;
1261	s->limit = var->limit;
1262	s->selector = var->selector;
1263	if (var->unusable)
1264		s->attrib = 0;
1265	else {
1266		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1267		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1268		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1269		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1270		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1271		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1272		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1273		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1274	}
1275	if (seg == VCPU_SREG_CS)
1276		svm->vmcb->save.cpl
1277			= (svm->vmcb->save.cs.attrib
1278			   >> SVM_SELECTOR_DPL_SHIFT) & 3;
1279
1280}
1281
1282static void update_db_intercept(struct kvm_vcpu *vcpu)
1283{
1284	struct vcpu_svm *svm = to_svm(vcpu);
1285
1286	svm->vmcb->control.intercept_exceptions &=
1287		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
1288
1289	if (svm->nmi_singlestep)
1290		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
1291
1292	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1293		if (vcpu->guest_debug &
1294		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1295			svm->vmcb->control.intercept_exceptions |=
1296				1 << DB_VECTOR;
1297		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1298			svm->vmcb->control.intercept_exceptions |=
1299				1 << BP_VECTOR;
1300	} else
1301		vcpu->guest_debug = 0;
1302}
1303
1304static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1305{
1306	struct vcpu_svm *svm = to_svm(vcpu);
1307
1308	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1309		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1310	else
1311		svm->vmcb->save.dr7 = vcpu->arch.dr7;
1312
1313	update_db_intercept(vcpu);
1314}
1315
1316static void load_host_msrs(struct kvm_vcpu *vcpu)
1317{
1318#ifdef CONFIG_X86_64
1319	wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1320#endif
1321}
1322
1323static void save_host_msrs(struct kvm_vcpu *vcpu)
1324{
1325#ifdef CONFIG_X86_64
1326	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1327#endif
1328}
1329
1330static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1331{
1332	if (sd->next_asid > sd->max_asid) {
1333		++sd->asid_generation;
1334		sd->next_asid = 1;
1335		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1336	}
1337
1338	svm->asid_generation = sd->asid_generation;
1339	svm->vmcb->control.asid = sd->next_asid++;
1340}
1341
1342static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1343{
1344	struct vcpu_svm *svm = to_svm(vcpu);
1345
1346	svm->vmcb->save.dr7 = value;
1347}
1348
1349static int pf_interception(struct vcpu_svm *svm)
1350{
1351	u64 fault_address;
1352	u32 error_code;
1353
1354	fault_address  = svm->vmcb->control.exit_info_2;
1355	error_code = svm->vmcb->control.exit_info_1;
1356
1357	trace_kvm_page_fault(fault_address, error_code);
1358	if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1359		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1360	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1361}
1362
1363static int db_interception(struct vcpu_svm *svm)
1364{
1365	struct kvm_run *kvm_run = svm->vcpu.run;
1366
1367	if (!(svm->vcpu.guest_debug &
1368	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1369		!svm->nmi_singlestep) {
1370		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1371		return 1;
1372	}
1373
1374	if (svm->nmi_singlestep) {
1375		svm->nmi_singlestep = false;
1376		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1377			svm->vmcb->save.rflags &=
1378				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1379		update_db_intercept(&svm->vcpu);
1380	}
1381
1382	if (svm->vcpu.guest_debug &
1383	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1384		kvm_run->exit_reason = KVM_EXIT_DEBUG;
1385		kvm_run->debug.arch.pc =
1386			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1387		kvm_run->debug.arch.exception = DB_VECTOR;
1388		return 0;
1389	}
1390
1391	return 1;
1392}
1393
1394static int bp_interception(struct vcpu_svm *svm)
1395{
1396	struct kvm_run *kvm_run = svm->vcpu.run;
1397
1398	kvm_run->exit_reason = KVM_EXIT_DEBUG;
1399	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1400	kvm_run->debug.arch.exception = BP_VECTOR;
1401	return 0;
1402}
1403
1404static int ud_interception(struct vcpu_svm *svm)
1405{
1406	int er;
1407
1408	er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
1409	if (er != EMULATE_DONE)
1410		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1411	return 1;
1412}
1413
1414static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1415{
1416	struct vcpu_svm *svm = to_svm(vcpu);
1417	u32 excp;
1418
1419	if (is_nested(svm)) {
1420		u32 h_excp, n_excp;
1421
1422		h_excp  = svm->nested.hsave->control.intercept_exceptions;
1423		n_excp  = svm->nested.intercept_exceptions;
1424		h_excp &= ~(1 << NM_VECTOR);
1425		excp    = h_excp | n_excp;
1426	} else {
1427		excp  = svm->vmcb->control.intercept_exceptions;
1428		excp &= ~(1 << NM_VECTOR);
1429	}
1430
1431	svm->vmcb->control.intercept_exceptions = excp;
1432
1433	svm->vcpu.fpu_active = 1;
1434	update_cr0_intercept(svm);
1435}
1436
1437static int nm_interception(struct vcpu_svm *svm)
1438{
1439	svm_fpu_activate(&svm->vcpu);
1440	return 1;
1441}
1442
1443static bool is_erratum_383(void)
1444{
1445	int err, i;
1446	u64 value;
1447
1448	if (!erratum_383_found)
1449		return false;
1450
1451	value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1452	if (err)
1453		return false;
1454
1455	/* Bit 62 may or may not be set for this mce */
1456	value &= ~(1ULL << 62);
1457
1458	if (value != 0xb600000000010015ULL)
1459		return false;
1460
1461	/* Clear MCi_STATUS registers */
1462	for (i = 0; i < 6; ++i)
1463		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1464
1465	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1466	if (!err) {
1467		u32 low, high;
1468
1469		value &= ~(1ULL << 2);
1470		low    = lower_32_bits(value);
1471		high   = upper_32_bits(value);
1472
1473		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1474	}
1475
1476	/* Flush tlb to evict multi-match entries */
1477	__flush_tlb_all();
1478
1479	return true;
1480}
1481
1482static void svm_handle_mce(struct vcpu_svm *svm)
1483{
1484	if (is_erratum_383()) {
1485		/*
1486		 * Erratum 383 triggered. Guest state is corrupt so kill the
1487		 * guest.
1488		 */
1489		pr_err("KVM: Guest triggered AMD Erratum 383\n");
1490
1491		set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
1492
1493		return;
1494	}
1495
1496	/*
1497	 * On an #MC intercept the MCE handler is not called automatically in
1498	 * the host. So do it by hand here.
1499	 */
1500	asm volatile (
1501		"int $0x12\n");
1502	/* not sure if we ever come back to this point */
1503
1504	return;
1505}
1506
1507static int mc_interception(struct vcpu_svm *svm)
1508{
1509	return 1;
1510}
1511
1512static int shutdown_interception(struct vcpu_svm *svm)
1513{
1514	struct kvm_run *kvm_run = svm->vcpu.run;
1515
1516	/*
1517	 * VMCB is undefined after a SHUTDOWN intercept
1518	 * so reinitialize it.
1519	 */
1520	clear_page(svm->vmcb);
1521	init_vmcb(svm);
1522
1523	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1524	return 0;
1525}
1526
1527static int io_interception(struct vcpu_svm *svm)
1528{
1529	struct kvm_vcpu *vcpu = &svm->vcpu;
1530	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1531	int size, in, string;
1532	unsigned port;
1533
1534	++svm->vcpu.stat.io_exits;
1535	string = (io_info & SVM_IOIO_STR_MASK) != 0;
1536	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1537	if (string || in)
1538		return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
1539
1540	port = io_info >> 16;
1541	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1542	svm->next_rip = svm->vmcb->control.exit_info_2;
1543	skip_emulated_instruction(&svm->vcpu);
1544
1545	return kvm_fast_pio_out(vcpu, size, port);
1546}
1547
1548static int nmi_interception(struct vcpu_svm *svm)
1549{
1550	return 1;
1551}
1552
1553static int intr_interception(struct vcpu_svm *svm)
1554{
1555	++svm->vcpu.stat.irq_exits;
1556	return 1;
1557}
1558
1559static int nop_on_interception(struct vcpu_svm *svm)
1560{
1561	return 1;
1562}
1563
1564static int halt_interception(struct vcpu_svm *svm)
1565{
1566	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1567	skip_emulated_instruction(&svm->vcpu);
1568	return kvm_emulate_halt(&svm->vcpu);
1569}
1570
1571static int vmmcall_interception(struct vcpu_svm *svm)
1572{
1573	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1574	skip_emulated_instruction(&svm->vcpu);
1575	kvm_emulate_hypercall(&svm->vcpu);
1576	return 1;
1577}
1578
1579static int nested_svm_check_permissions(struct vcpu_svm *svm)
1580{
1581	if (!(svm->vcpu.arch.efer & EFER_SVME)
1582	    || !is_paging(&svm->vcpu)) {
1583		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1584		return 1;
1585	}
1586
1587	if (svm->vmcb->save.cpl) {
1588		kvm_inject_gp(&svm->vcpu, 0);
1589		return 1;
1590	}
1591
1592       return 0;
1593}
1594
1595static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1596				      bool has_error_code, u32 error_code)
1597{
1598	int vmexit;
1599
1600	if (!is_nested(svm))
1601		return 0;
1602
1603	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1604	svm->vmcb->control.exit_code_hi = 0;
1605	svm->vmcb->control.exit_info_1 = error_code;
1606	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1607
1608	vmexit = nested_svm_intercept(svm);
1609	if (vmexit == NESTED_EXIT_DONE)
1610		svm->nested.exit_required = true;
1611
1612	return vmexit;
1613}
1614
1615/* This function returns true if it is save to enable the irq window */
1616static inline bool nested_svm_intr(struct vcpu_svm *svm)
1617{
1618	if (!is_nested(svm))
1619		return true;
1620
1621	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1622		return true;
1623
1624	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1625		return false;
1626
1627	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
1628	svm->vmcb->control.exit_info_1 = 0;
1629	svm->vmcb->control.exit_info_2 = 0;
1630
1631	if (svm->nested.intercept & 1ULL) {
1632		/*
1633		 * The #vmexit can't be emulated here directly because this
1634		 * code path runs with irqs and preemtion disabled. A
1635		 * #vmexit emulation might sleep. Only signal request for
1636		 * the #vmexit here.
1637		 */
1638		svm->nested.exit_required = true;
1639		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1640		return false;
1641	}
1642
1643	return true;
1644}
1645
1646/* This function returns true if it is save to enable the nmi window */
1647static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1648{
1649	if (!is_nested(svm))
1650		return true;
1651
1652	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1653		return true;
1654
1655	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1656	svm->nested.exit_required = true;
1657
1658	return false;
1659}
1660
1661static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1662{
1663	struct page *page;
1664
1665	might_sleep();
1666
1667	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1668	if (is_error_page(page))
1669		goto error;
1670
1671	*_page = page;
1672
1673	return kmap(page);
1674
1675error:
1676	kvm_release_page_clean(page);
1677	kvm_inject_gp(&svm->vcpu, 0);
1678
1679	return NULL;
1680}
1681
1682static void nested_svm_unmap(struct page *page)
1683{
1684	kunmap(page);
1685	kvm_release_page_dirty(page);
1686}
1687
1688static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1689{
1690	unsigned port;
1691	u8 val, bit;
1692	u64 gpa;
1693
1694	if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
1695		return NESTED_EXIT_HOST;
1696
1697	port = svm->vmcb->control.exit_info_1 >> 16;
1698	gpa  = svm->nested.vmcb_iopm + (port / 8);
1699	bit  = port % 8;
1700	val  = 0;
1701
1702	if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
1703		val &= (1 << bit);
1704
1705	return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1706}
1707
1708static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1709{
1710	u32 offset, msr, value;
1711	int write, mask;
1712
1713	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1714		return NESTED_EXIT_HOST;
1715
1716	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1717	offset = svm_msrpm_offset(msr);
1718	write  = svm->vmcb->control.exit_info_1 & 1;
1719	mask   = 1 << ((2 * (msr & 0xf)) + write);
1720
1721	if (offset == MSR_INVALID)
1722		return NESTED_EXIT_DONE;
1723
1724	/* Offset is in 32 bit units but need in 8 bit units */
1725	offset *= 4;
1726
1727	if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
1728		return NESTED_EXIT_DONE;
1729
1730	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1731}
1732
1733static int nested_svm_exit_special(struct vcpu_svm *svm)
1734{
1735	u32 exit_code = svm->vmcb->control.exit_code;
1736
1737	switch (exit_code) {
1738	case SVM_EXIT_INTR:
1739	case SVM_EXIT_NMI:
1740	case SVM_EXIT_EXCP_BASE + MC_VECTOR:
1741		return NESTED_EXIT_HOST;
1742	case SVM_EXIT_NPF:
1743		/* For now we are always handling NPFs when using them */
1744		if (npt_enabled)
1745			return NESTED_EXIT_HOST;
1746		break;
1747	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1748		/* When we're shadowing, trap PFs */
1749		if (!npt_enabled)
1750			return NESTED_EXIT_HOST;
1751		break;
1752	case SVM_EXIT_EXCP_BASE + NM_VECTOR:
1753		nm_interception(svm);
1754		break;
1755	default:
1756		break;
1757	}
1758
1759	return NESTED_EXIT_CONTINUE;
1760}
1761
1762/*
1763 * If this function returns true, this #vmexit was already handled
1764 */
1765static int nested_svm_intercept(struct vcpu_svm *svm)
1766{
1767	u32 exit_code = svm->vmcb->control.exit_code;
1768	int vmexit = NESTED_EXIT_HOST;
1769
1770	switch (exit_code) {
1771	case SVM_EXIT_MSR:
1772		vmexit = nested_svm_exit_handled_msr(svm);
1773		break;
1774	case SVM_EXIT_IOIO:
1775		vmexit = nested_svm_intercept_ioio(svm);
1776		break;
1777	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1778		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1779		if (svm->nested.intercept_cr_read & cr_bits)
1780			vmexit = NESTED_EXIT_DONE;
1781		break;
1782	}
1783	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1784		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1785		if (svm->nested.intercept_cr_write & cr_bits)
1786			vmexit = NESTED_EXIT_DONE;
1787		break;
1788	}
1789	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1790		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1791		if (svm->nested.intercept_dr_read & dr_bits)
1792			vmexit = NESTED_EXIT_DONE;
1793		break;
1794	}
1795	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1796		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1797		if (svm->nested.intercept_dr_write & dr_bits)
1798			vmexit = NESTED_EXIT_DONE;
1799		break;
1800	}
1801	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1802		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1803		if (svm->nested.intercept_exceptions & excp_bits)
1804			vmexit = NESTED_EXIT_DONE;
1805		break;
1806	}
1807	case SVM_EXIT_ERR: {
1808		vmexit = NESTED_EXIT_DONE;
1809		break;
1810	}
1811	default: {
1812		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1813		if (svm->nested.intercept & exit_bits)
1814			vmexit = NESTED_EXIT_DONE;
1815	}
1816	}
1817
1818	return vmexit;
1819}
1820
1821static int nested_svm_exit_handled(struct vcpu_svm *svm)
1822{
1823	int vmexit;
1824
1825	vmexit = nested_svm_intercept(svm);
1826
1827	if (vmexit == NESTED_EXIT_DONE)
1828		nested_svm_vmexit(svm);
1829
1830	return vmexit;
1831}
1832
1833static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
1834{
1835	struct vmcb_control_area *dst  = &dst_vmcb->control;
1836	struct vmcb_control_area *from = &from_vmcb->control;
1837
1838	dst->intercept_cr_read    = from->intercept_cr_read;
1839	dst->intercept_cr_write   = from->intercept_cr_write;
1840	dst->intercept_dr_read    = from->intercept_dr_read;
1841	dst->intercept_dr_write   = from->intercept_dr_write;
1842	dst->intercept_exceptions = from->intercept_exceptions;
1843	dst->intercept            = from->intercept;
1844	dst->iopm_base_pa         = from->iopm_base_pa;
1845	dst->msrpm_base_pa        = from->msrpm_base_pa;
1846	dst->tsc_offset           = from->tsc_offset;
1847	dst->asid                 = from->asid;
1848	dst->tlb_ctl              = from->tlb_ctl;
1849	dst->int_ctl              = from->int_ctl;
1850	dst->int_vector           = from->int_vector;
1851	dst->int_state            = from->int_state;
1852	dst->exit_code            = from->exit_code;
1853	dst->exit_code_hi         = from->exit_code_hi;
1854	dst->exit_info_1          = from->exit_info_1;
1855	dst->exit_info_2          = from->exit_info_2;
1856	dst->exit_int_info        = from->exit_int_info;
1857	dst->exit_int_info_err    = from->exit_int_info_err;
1858	dst->nested_ctl           = from->nested_ctl;
1859	dst->event_inj            = from->event_inj;
1860	dst->event_inj_err        = from->event_inj_err;
1861	dst->nested_cr3           = from->nested_cr3;
1862	dst->lbr_ctl              = from->lbr_ctl;
1863}
1864
1865static int nested_svm_vmexit(struct vcpu_svm *svm)
1866{
1867	struct vmcb *nested_vmcb;
1868	struct vmcb *hsave = svm->nested.hsave;
1869	struct vmcb *vmcb = svm->vmcb;
1870	struct page *page;
1871
1872	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1873				       vmcb->control.exit_info_1,
1874				       vmcb->control.exit_info_2,
1875				       vmcb->control.exit_int_info,
1876				       vmcb->control.exit_int_info_err);
1877
1878	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
1879	if (!nested_vmcb)
1880		return 1;
1881
1882	/* Exit nested SVM mode */
1883	svm->nested.vmcb = 0;
1884
1885	/* Give the current vmcb to the guest */
1886	disable_gif(svm);
1887
1888	nested_vmcb->save.es     = vmcb->save.es;
1889	nested_vmcb->save.cs     = vmcb->save.cs;
1890	nested_vmcb->save.ss     = vmcb->save.ss;
1891	nested_vmcb->save.ds     = vmcb->save.ds;
1892	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
1893	nested_vmcb->save.idtr   = vmcb->save.idtr;
1894	nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
1895	nested_vmcb->save.cr3    = svm->vcpu.arch.cr3;
1896	nested_vmcb->save.cr2    = vmcb->save.cr2;
1897	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
1898	nested_vmcb->save.rflags = vmcb->save.rflags;
1899	nested_vmcb->save.rip    = vmcb->save.rip;
1900	nested_vmcb->save.rsp    = vmcb->save.rsp;
1901	nested_vmcb->save.rax    = vmcb->save.rax;
1902	nested_vmcb->save.dr7    = vmcb->save.dr7;
1903	nested_vmcb->save.dr6    = vmcb->save.dr6;
1904	nested_vmcb->save.cpl    = vmcb->save.cpl;
1905
1906	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
1907	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
1908	nested_vmcb->control.int_state         = vmcb->control.int_state;
1909	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
1910	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
1911	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
1912	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
1913	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
1914	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1915
1916	/*
1917	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
1918	 * to make sure that we do not lose injected events. So check event_inj
1919	 * here and copy it to exit_int_info if it is valid.
1920	 * Exit_int_info and event_inj can't be both valid because the case
1921	 * below only happens on a VMRUN instruction intercept which has
1922	 * no valid exit_int_info set.
1923	 */
1924	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
1925		struct vmcb_control_area *nc = &nested_vmcb->control;
1926
1927		nc->exit_int_info     = vmcb->control.event_inj;
1928		nc->exit_int_info_err = vmcb->control.event_inj_err;
1929	}
1930
1931	nested_vmcb->control.tlb_ctl           = 0;
1932	nested_vmcb->control.event_inj         = 0;
1933	nested_vmcb->control.event_inj_err     = 0;
1934
1935	/* We always set V_INTR_MASKING and remember the old value in hflags */
1936	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1937		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1938
1939	/* Restore the original control entries */
1940	copy_vmcb_control_area(vmcb, hsave);
1941
1942	kvm_clear_exception_queue(&svm->vcpu);
1943	kvm_clear_interrupt_queue(&svm->vcpu);
1944
1945	/* Restore selected save entries */
1946	svm->vmcb->save.es = hsave->save.es;
1947	svm->vmcb->save.cs = hsave->save.cs;
1948	svm->vmcb->save.ss = hsave->save.ss;
1949	svm->vmcb->save.ds = hsave->save.ds;
1950	svm->vmcb->save.gdtr = hsave->save.gdtr;
1951	svm->vmcb->save.idtr = hsave->save.idtr;
1952	svm->vmcb->save.rflags = hsave->save.rflags;
1953	svm_set_efer(&svm->vcpu, hsave->save.efer);
1954	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1955	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1956	if (npt_enabled) {
1957		svm->vmcb->save.cr3 = hsave->save.cr3;
1958		svm->vcpu.arch.cr3 = hsave->save.cr3;
1959	} else {
1960		kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1961	}
1962	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1963	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1964	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1965	svm->vmcb->save.dr7 = 0;
1966	svm->vmcb->save.cpl = 0;
1967	svm->vmcb->control.exit_int_info = 0;
1968
1969	nested_svm_unmap(page);
1970
1971	kvm_mmu_reset_context(&svm->vcpu);
1972	kvm_mmu_load(&svm->vcpu);
1973
1974	return 0;
1975}
1976
1977static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
1978{
1979	/*
1980	 * This function merges the msr permission bitmaps of kvm and the
1981	 * nested vmcb. It is omptimized in that it only merges the parts where
1982	 * the kvm msr permission bitmap may contain zero bits
1983	 */
1984	int i;
1985
1986	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1987		return true;
1988
1989	for (i = 0; i < MSRPM_OFFSETS; i++) {
1990		u32 value, p;
1991		u64 offset;
1992
1993		if (msrpm_offsets[i] == 0xffffffff)
1994			break;
1995
1996		p      = msrpm_offsets[i];
1997		offset = svm->nested.vmcb_msrpm + (p * 4);
1998
1999		if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2000			return false;
2001
2002		svm->nested.msrpm[p] = svm->msrpm[p] | value;
2003	}
2004
2005	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2006
2007	return true;
2008}
2009
2010static bool nested_svm_vmrun(struct vcpu_svm *svm)
2011{
2012	struct vmcb *nested_vmcb;
2013	struct vmcb *hsave = svm->nested.hsave;
2014	struct vmcb *vmcb = svm->vmcb;
2015	struct page *page;
2016	u64 vmcb_gpa;
2017
2018	vmcb_gpa = svm->vmcb->save.rax;
2019
2020	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2021	if (!nested_vmcb)
2022		return false;
2023
2024	trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
2025			       nested_vmcb->save.rip,
2026			       nested_vmcb->control.int_ctl,
2027			       nested_vmcb->control.event_inj,
2028			       nested_vmcb->control.nested_ctl);
2029
2030	trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
2031				    nested_vmcb->control.intercept_cr_write,
2032				    nested_vmcb->control.intercept_exceptions,
2033				   

Large files files are truncated, but you can click here to view the full file