From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 2 Jul 2018 11:07:14 +0000 (+0200)
Subject: x86/KVM/VMX: Add L1D flush logic
X-Git-Tag: v4.1.12-124.31.3~636
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=bd782586b52bd88a3e750e4eec5578e0e49723e5;p=users%2Fjedix%2Flinux-maple.git

x86/KVM/VMX: Add L1D flush logic

Add the logic for flushing L1D on VMENTER. The flush depends on the static
key being enabled and the new l1tf_flush_l1d flag being set.

The flags is set:
 - Always, if the flush module parameter is 'always'

 - Conditionally at:
   - Entry to vcpu_run(), i.e. after executing user space

   - From the sched_in notifier, i.e. when switching to a vCPU thread.

   - From vmexit handlers which are considered unsafe, i.e. where
     sensitive data can be brought into L1D:

     - The emulator, which could be a good target for other speculative
       execution-based threats,

     - The MMU, which can bring host page tables in the L1 cache.

     - External interrupts

     - Nested operations that require the MMU (see above). That is
       vmptrld, vmptrst, vmclear,vmwrite,vmread.

     - When handling invept,invvpid

[ tglx: Split out from combo patch and reduced to a single flag ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Orabug: 28220674
CVE: CVE-2018-3646

(cherry picked from commit c595ceee45707f00f64f61c54fb64ef0cc0b4e85)

Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

Conflicts:
	arch/x86/include/asm/kvm_host.h
	arch/x86/kvm/mmu.c
	arch/x86/kvm/vmx.c
	arch/x86/kvm/x86.c
Contextual: different content
kvm_handle_page_fault doesn't exist so l1tf_flush_l1d was set on true in
handle_exception.
static_branch_unlikely was replaced with unlikely(static_key_enabled) because
11276d53 "locking/static_keys: Add a new static_key interface" is missing in
the current version.
---

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 10dd34d36ccd..004c70549d48 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -542,6 +542,9 @@ struct kvm_vcpu_arch {
 	struct {
 		bool pv_unhalted;
 	} pv;
+
+	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
+	bool l1tf_flush_l1d;
 };
 
 struct kvm_lpage_info {
@@ -666,6 +669,7 @@ struct kvm_vcpu_stat {
 	u32 signal_exits;
 	u32 irq_window_exits;
 	u32 nmi_window_exits;
+	u64 l1d_flush;
 	u32 halt_exits;
 	u32 halt_successful_poll;
 	u32 halt_wakeup;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4f80c38f37b4..6e21980f559e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5238,6 +5238,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
 		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
+		vcpu->arch.l1tf_flush_l1d = true;
 		return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
 	}
 
@@ -7923,9 +7924,20 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;
 
-static void __maybe_unused vmx_l1d_flush(void)
+static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 {
 	int size = PAGE_SIZE << L1D_CACHE_ORDER;
+	bool always;
+
+	/*
+	 * If the mitigation mode is 'flush always', keep the flush bit
+	 * set, otherwise clear it. It gets set again either from
+	 * vcpu_run() or from one of the unsafe VMEXIT handlers.
+	 */
+	always = vmentry_l1d_flush == VMENTER_L1D_FLUSH_ALWAYS;
+	vcpu->arch.l1tf_flush_l1d = always;
+
+	vcpu->stat.l1d_flush++;
 
 	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
@@ -8161,6 +8173,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			[ss]"i"(__KERNEL_DS),
 			[cs]"i"(__KERNEL_CS)
 			);
+		vcpu->arch.l1tf_flush_l1d = true;
 	} else
 		local_irq_enable();
 }
@@ -8371,6 +8384,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
 
+	if (unlikely(static_key_enabled(&vmx_l1d_should_flush))) {
+		if (vcpu->arch.l1tf_flush_l1d)
+			vmx_l1d_flush(vcpu);
+	}
+
 	asm(
 		/* Store host registers */
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9821,6 +9839,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmcs12->launch_state = 1;
 
+	/* Hide L1D cache contents from the nested guest.  */
+	vmx->vcpu.arch.l1tf_flush_l1d = true;
+
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_vcpu_halt(vcpu);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d128875fd642..b05535b2871c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -157,6 +157,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
 	{ "nmi_injections", VCPU_STAT(nmi_injections) },
+	{ "l1d_flush", VCPU_STAT(l1d_flush) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -4355,6 +4356,9 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
+	/* kvm_write_guest_virt_system can pull in tons of pages. */
+	vcpu->arch.l1tf_flush_l1d = true;
+
 	while (bytes) {
 		gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
 							     PFERR_WRITE_MASK,
@@ -4378,6 +4382,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 out:
 	return r;
 }
+
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
@@ -5395,6 +5400,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	bool writeback = true;
 	bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
 
+	vcpu->arch.l1tf_flush_l1d = true;
+
 	/*
 	 * Clear write_fault_to_shadow_pgtable here to ensure it is
 	 * never reused.
@@ -6494,6 +6501,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	vcpu->arch.l1tf_flush_l1d = true;
 
 	for (;;) {
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
@@ -7427,6 +7435,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	vcpu->arch.l1tf_flush_l1d = true;
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }