From: Boris Ostrovsky Date: Tue, 26 Mar 2019 22:46:02 +0000 (-0400) Subject: Revert "KVM: VMX: make MSR bitmaps per-VCPU" X-Git-Tag: v4.1.12-124.31.3~222 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=a766796106042e8b4685bf8e8c3fb739295320aa;p=users%2Fjedix%2Flinux-maple.git Revert "KVM: VMX: make MSR bitmaps per-VCPU" This reverts commit b3bd557498bff191933d0481b609fdff0caa9ead. Revert due to performance regression. Orabug: 29542029 Signed-off-by: Boris Ostrovsky Reviewed-by: Mihai Carabas Signed-off-by: Brian Maly --- diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 293ca2e1e98f..4b2c767216d7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -109,14 +109,6 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - -#define MSR_BITMAP_MODE_X2APIC 1 -#define MSR_BITMAP_MODE_X2APIC_APICV 2 -#define MSR_BITMAP_MODE_LM 4 - #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -337,7 +329,6 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; - unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -572,6 +563,8 @@ struct nested_vmx { u16 posted_intr_nv; u64 msr_ia32_feature_control; + unsigned long *msr_bitmap; + struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -630,7 +623,6 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; - u8 msr_bitmap_mode; bool nmi_known_unmasked; u32 exit_intr_info; u32 idt_vectoring_info; @@ -985,8 +977,6 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); -static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); - static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* @@ -998,6 +988,10 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; +static unsigned long *vmx_msr_bitmap_legacy; +static unsigned long *vmx_msr_bitmap_longmode; +static unsigned long *vmx_msr_bitmap_legacy_x2apic; +static unsigned long *vmx_msr_bitmap_longmode_x2apic; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2366,6 +2360,29 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) +{ + unsigned long *msr_bitmap; + + if (is_guest_mode(vcpu)) + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; + else if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode_x2apic; + else + msr_bitmap = vmx_msr_bitmap_legacy_x2apic; + } else { + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode; + else + msr_bitmap = vmx_msr_bitmap_legacy; + } + + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); +} + /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2406,7 +2423,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); + vmx_set_msr_bitmap(&vmx->vcpu); } /* @@ -3389,8 +3406,6 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; - if (loaded_vmcs->msr_bitmap) - free_page((unsigned long)loaded_vmcs->msr_bitmap); /* * The VMCS could be recycled, causing a false negative in vmx_vcpu_load; * block speculative execution. @@ -3411,18 +3426,7 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) return -ENOMEM; loaded_vmcs_init(loaded_vmcs); - - if (cpu_has_vmx_msr_bitmap()) { - loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); - if (!loaded_vmcs->msr_bitmap) - goto out_vmcs; - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); - } return 0; - -out_vmcs: - free_loaded_vmcs(loaded_vmcs); - return -ENOMEM; } static void free_kvm_area(void) @@ -4434,8 +4438,10 @@ static void free_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4469,8 +4475,8 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit } } -static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4504,15 +4510,6 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type, bool value) -{ - if (value) - vmx_enable_intercept_for_msr(msr_bitmap, msr, type); - else - vmx_disable_intercept_for_msr(msr_bitmap, msr, type); -} - /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -4559,68 +4556,37 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) { - u8 mode = 0; - - if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - mode |= MSR_BITMAP_MODE_X2APIC; - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) - mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } - - if (is_long_mode(vcpu)) - mode |= MSR_BITMAP_MODE_LM; - - return mode; + if (!longmode_only) + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, MSR_TYPE_R | MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, MSR_TYPE_R | MSR_TYPE_W); } -#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) - -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, - u8 mode) +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) { - int msr; - - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { - unsigned word = msr / BITS_PER_LONG; - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; - msr_bitmap[word + (0x800 / sizeof(long))] = ~0; - } - - if (mode & MSR_BITMAP_MODE_X2APIC) { - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); - } - } + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); } -static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - u8 mode = vmx_msr_bitmap_mode(vcpu); - u8 changed = mode ^ vmx->msr_bitmap_mode; - - if (!changed) - return; - - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, - !(mode & MSR_BITMAP_MODE_LM)); - - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); +} - vmx->msr_bitmap_mode = mode; +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) +{ + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_W); } static int vmx_vm_has_apicv(struct kvm *kvm) @@ -4827,7 +4793,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); + vmx_set_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -4915,7 +4881,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); + vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -6318,7 +6284,7 @@ static void update_ple_window_actual_max(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i; + int r = -ENOMEM, i, msr; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6333,13 +6299,31 @@ static __init int hardware_setup(void) if (!vmx_io_bitmap_b) goto out; + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy) + goto out1; + + vmx_msr_bitmap_legacy_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy_x2apic) + goto out2; + + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode) + goto out3; + + vmx_msr_bitmap_longmode_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode_x2apic) + goto out4; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) - goto out1; + goto out6; vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmwrite_bitmap) - goto out2; + goto out7; memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); @@ -6348,9 +6332,12 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); + if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; - goto out3; + goto out8; } if (boot_cpu_has(X86_FEATURE_NX)) @@ -6404,8 +6391,38 @@ static __init int hardware_setup(void) kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; } + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); + vmx_disable_intercept_for_msr(MSR_GS_BASE, false); + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false); + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false); + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); + + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); + memcpy(vmx_msr_bitmap_longmode_x2apic, + vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + for (msr = 0x800; msr <= 0x8ff; msr++) + vmx_disable_intercept_msr_read_x2apic(msr); + + /* According SDM, in x2apic mode, the whole id reg is used. But in + * KVM, it only use the highest eight bits. Need to intercept it */ + vmx_enable_intercept_msr_read_x2apic(0x802); + /* TMCCT */ + vmx_enable_intercept_msr_read_x2apic(0x839); + /* TPR */ + vmx_disable_intercept_msr_write_x2apic(0x808); + /* EOI */ + vmx_disable_intercept_msr_write_x2apic(0x80b); + /* SELF-IPI */ + vmx_disable_intercept_msr_write_x2apic(0x83f); + if (enable_ept) { kvm_mmu_set_mask_ptes(0ull, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, @@ -6439,10 +6456,18 @@ static __init int hardware_setup(void) return alloc_kvm_area(); -out3: +out8: free_page((unsigned long)vmx_vmwrite_bitmap); -out2: +out7: free_page((unsigned long)vmx_vmread_bitmap); +out6: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); +out4: + free_page((unsigned long)vmx_msr_bitmap_longmode); +out3: + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); +out2: + free_page((unsigned long)vmx_msr_bitmap_legacy); out1: free_page((unsigned long)vmx_io_bitmap_b); out: @@ -6453,6 +6478,10 @@ out: static __exit void hardware_unsetup(void) { + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); + free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); @@ -6768,6 +6797,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); if (!shadow_vmcs) @@ -6790,6 +6826,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; out_shadow_vmcs: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: free_loaded_vmcs(&vmx->nested.vmcs02); out_vmcs02: @@ -6864,6 +6903,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; nested_release_vmcs12(vmx); + if (vmx->nested.msr_bitmap) { + free_page((unsigned long)vmx->nested.msr_bitmap); + vmx->nested.msr_bitmap = NULL; + } if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in the vmcs02 */ @@ -8057,7 +8100,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_update_msr_bitmap(vcpu); + vmx_set_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -8647,7 +8690,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -8671,19 +8713,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) err = alloc_loaded_vmcs(&vmx->vmcs01); if (err < 0) goto free_msrs; - - msr_bitmap = vmx->vmcs01.msr_bitmap; - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_BNDCFGS, MSR_TYPE_RW); - vmx->msr_bitmap_mode = 0; - vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); @@ -9076,7 +9105,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) @@ -9632,9 +9661,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); - if (enable_vpid) { /* * Trivially support vpid by letting L2s share their parent @@ -10324,7 +10350,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); + vmx_set_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count))