From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 1 Feb 2018 14:04:17 +0000 (+0100)
Subject: Merge branch 'x86/hyperv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
X-Git-Tag: v4.16-rc1~10^2~4
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=7bf14c28ee776be567855bd39ed8ff795ea19f55;p=linux.git

Merge branch 'x86/hyperv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Topic branch for stable KVM clockource under Hyper-V.

Thanks to Christoffer Dall for resolving the ARM conflict.
---

7bf14c28ee776be567855bd39ed8ff795ea19f55
diff --cc Documentation/virtual/kvm/api.txt
index e5f1743e0b3e,fc3ae951bc07..70d3368adba9
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@@ -3403,56 -3403,52 +3403,103 @@@ invalid, if invalid pages are written t
  or if no page table is present for the addresses (e.g. when using
  hugepages).
  
- 4.109 KVM_MEMORY_ENCRYPT_OP
 -4.108 KVM_PPC_GET_CPU_CHAR
++4.109 KVM_PPC_GET_CPU_CHAR
+ 
+ Capability: KVM_CAP_PPC_GET_CPU_CHAR
+ Architectures: powerpc
+ Type: vm ioctl
+ Parameters: struct kvm_ppc_cpu_char (out)
+ Returns: 0 on successful completion
+ 	 -EFAULT if struct kvm_ppc_cpu_char cannot be written
+ 
+ This ioctl gives userspace information about certain characteristics
+ of the CPU relating to speculative execution of instructions and
+ possible information leakage resulting from speculative execution (see
+ CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754).  The information is
+ returned in struct kvm_ppc_cpu_char, which looks like this:
+ 
+ struct kvm_ppc_cpu_char {
+ 	__u64	character;		/* characteristics of the CPU */
+ 	__u64	behaviour;		/* recommended software behaviour */
+ 	__u64	character_mask;		/* valid bits in character */
+ 	__u64	behaviour_mask;		/* valid bits in behaviour */
+ };
+ 
+ For extensibility, the character_mask and behaviour_mask fields
+ indicate which bits of character and behaviour have been filled in by
+ the kernel.  If the set of defined bits is extended in future then
+ userspace will be able to tell whether it is running on a kernel that
+ knows about the new bits.
+ 
+ The character field describes attributes of the CPU which can help
+ with preventing inadvertent information disclosure - specifically,
+ whether there is an instruction to flash-invalidate the L1 data cache
+ (ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+ to a mode where entries can only be used by the thread that created
+ them, whether the bcctr[l] instruction prevents speculation, and
+ whether a speculation barrier instruction (ori 31,31,0) is provided.
+ 
+ The behaviour field describes actions that software should take to
+ prevent inadvertent information disclosure, and thus describes which
+ vulnerabilities the hardware is subject to; specifically whether the
+ L1 data cache should be flushed when returning to user mode from the
+ kernel, and whether a speculation barrier should be placed between an
+ array bounds check and the array access.
+ 
+ These fields use the same bit definitions as the new
+ H_GET_CPU_CHARACTERISTICS hypercall.
+ 
++4.110 KVM_MEMORY_ENCRYPT_OP
 +
 +Capability: basic
 +Architectures: x86
 +Type: system
 +Parameters: an opaque platform specific structure (in/out)
 +Returns: 0 on success; -1 on error
 +
 +If the platform supports creating encrypted VMs then this ioctl can be used
 +for issuing platform-specific memory encryption commands to manage those
 +encrypted VMs.
 +
 +Currently, this ioctl is used for issuing Secure Encrypted Virtualization
 +(SEV) commands on AMD Processors. The SEV commands are defined in
 +Documentation/virtual/kvm/amd-memory-encryption.txt.
 +
- 4.110 KVM_MEMORY_ENCRYPT_REG_REGION
++4.111 KVM_MEMORY_ENCRYPT_REG_REGION
 +
 +Capability: basic
 +Architectures: x86
 +Type: system
 +Parameters: struct kvm_enc_region (in)
 +Returns: 0 on success; -1 on error
 +
 +This ioctl can be used to register a guest memory region which may
 +contain encrypted data (e.g. guest RAM, SMRAM etc).
 +
 +It is used in the SEV-enabled guest. When encryption is enabled, a guest
 +memory region may contain encrypted data. The SEV memory encryption
 +engine uses a tweak such that two identical plaintext pages, each at
 +different locations will have differing ciphertexts. So swapping or
 +moving ciphertext of those pages will not result in plaintext being
 +swapped. So relocating (or migrating) physical backing pages for the SEV
 +guest will require some additional steps.
 +
 +Note: The current SEV key management spec does not provide commands to
 +swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
 +memory region registered with the ioctl.
 +
- 4.111 KVM_MEMORY_ENCRYPT_UNREG_REGION
++4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
 +
 +Capability: basic
 +Architectures: x86
 +Type: system
 +Parameters: struct kvm_enc_region (in)
 +Returns: 0 on success; -1 on error
 +
 +This ioctl can be used to unregister the guest memory region registered
 +with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
 +
++
  5. The kvm_run structure
  ------------------------
  
diff --cc arch/s390/kvm/vsie.c
index 6d494ed5907e,751348348477..ec772700ff96
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@@ -336,12 -325,15 +343,15 @@@ static int shadow_scb(struct kvm_vcpu *
  	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  		scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  	/* transactional execution */
 -	if (test_kvm_facility(vcpu->kvm, 73)) {
 +	if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  		/* remap the prefix is tx is toggled on */
 -		if ((scb_o->ecb & ECB_TE) && !had_tx)
 +		if (!had_tx)
  			prefix_unmapped(vsie_page);
 -		scb_s->ecb |= scb_o->ecb & ECB_TE;
 +		scb_s->ecb |= ECB_TE;
  	}
+ 	/* branch prediction */
+ 	if (test_kvm_facility(vcpu->kvm, 82))
+ 		scb_s->fpf |= scb_o->fpf & FPF_BPBC;
  	/* SIMD */
  	if (test_kvm_facility(vcpu->kvm, 129)) {
  		scb_s->eca |= scb_o->eca & ECA_VX;
diff --cc arch/x86/include/asm/cpufeatures.h
index 19f35be95f16,1d9199e1c2ad..0dfe4d3f74e2
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@@ -201,14 -202,16 +202,17 @@@
  #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
  #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
  #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
- #define X86_FEATURE_SEV			( 7*32+11) /* AMD Secure Encrypted Virtualization */
- 
+ #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
+ #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
  #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
- #define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
- #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
- #define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
  
  #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+ #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
++#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
+ 
+ #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
  
  /* Virtualization flags: Linux defined, word 8 */
  #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
diff --cc arch/x86/kvm/vmx.c
index 438802d0b01d,a8b96dc4cd83..bb5b4888505b
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -9411,9 -9491,12 +9421,12 @@@ static void __noclone vmx_vcpu_run(stru
  #endif
  	      );
  
+ 	/* Eliminate branch target predictions from guest mode */
+ 	vmexit_fill_RSB();
+ 
  	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 -	if (debugctlmsr)
 -		update_debugctlmsr(debugctlmsr);
 +	if (vmx->host_debugctlmsr)
 +		update_debugctlmsr(vmx->host_debugctlmsr);
  
  #ifndef CONFIG_X86_64
  	/*
diff --cc arch/x86/kvm/x86.c
index c13cd14c4780,cd3b3bc67c5a..0e27ee573bd5
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -7592,8 -7615,11 +7703,11 @@@ int kvm_arch_vcpu_ioctl_set_sregs(struc
  
  	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
  			(sregs->cr4 & X86_CR4_OSXSAVE))
 -		return -EINVAL;
 +		goto out;
  
+ 	if (kvm_valid_sregs(vcpu, sregs))
+ 		return -EINVAL;
+ 
  	apic_base_msr.data = sregs->apic_base;
  	apic_base_msr.host_initiated = true;
  	if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --cc virt/kvm/arm/arch_timer.c
index fb6bd9b9845e,cc29a8148328..70268c0bec79
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@@ -92,18 -92,27 +92,22 @@@ static irqreturn_t kvm_arch_timer_handl
  {
  	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
  	struct arch_timer_context *vtimer;
 -	u32 cnt_ctl;
  
- 	if (!vcpu) {
- 		pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
- 		return IRQ_NONE;
- 	}
+ 	/*
+ 	 * We may see a timer interrupt after vcpu_put() has been called which
+ 	 * sets the CPU's vcpu pointer to NULL, because even though the timer
+ 	 * has been disabled in vtimer_save_state(), the hardware interrupt
+ 	 * signal may not have been retired from the interrupt controller yet.
+ 	 */
+ 	if (!vcpu)
+ 		return IRQ_HANDLED;
  
  	vtimer = vcpu_vtimer(vcpu);
 -	if (!vtimer->irq.level) {
 -		cnt_ctl = read_sysreg_el0(cntv_ctl);
 -		cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
 -			   ARCH_TIMER_CTRL_IT_MASK;
 -		if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
 -			kvm_timer_update_irq(vcpu, true, vtimer);
 -	}
 +	if (kvm_timer_should_fire(vtimer))
 +		kvm_timer_update_irq(vcpu, true, vtimer);
  
 -	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 +	if (static_branch_unlikely(&userspace_irqchip_in_use) &&
 +	    unlikely(!irqchip_in_kernel(vcpu->kvm)))
  		kvm_vtimer_update_mask_user(vcpu);
  
  	return IRQ_HANDLED;