RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
        RTIT_STATUS_BYTECNT))
 
+/*
+ * List of MSRs that can be directly passed to the guest.
+ * In addition to these x2apic and PT MSRs are handled specially.
+ */
+static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
+       MSR_IA32_SPEC_CTRL,
+       MSR_IA32_PRED_CMD,
+       MSR_IA32_TSC,
+       MSR_FS_BASE,
+       MSR_GS_BASE,
+       MSR_KERNEL_GS_BASE,
+       MSR_IA32_SYSENTER_CS,
+       MSR_IA32_SYSENTER_ESP,
+       MSR_IA32_SYSENTER_EIP,
+       MSR_CORE_C1_RES,
+       MSR_CORE_C3_RESIDENCY,
+       MSR_CORE_C6_RESIDENCY,
+       MSR_CORE_C7_RESIDENCY,
+};
+
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
        return flexpriority_enabled;
 }
 
+static int possible_passthrough_msr_slot(u32 msr)
+{
+       u32 i;
+
+       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
+               if (vmx_possible_passthrough_msrs[i] == msr)
+                       return i;
+
+       return -ENOENT;
+}
+
+static bool is_valid_passthrough_msr(u32 msr)
+{
+       bool r;
+
+       switch (msr) {
+       case 0x800 ... 0x8ff:
+               /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
+               return true;
+       case MSR_IA32_RTIT_STATUS:
+       case MSR_IA32_RTIT_OUTPUT_BASE:
+       case MSR_IA32_RTIT_OUTPUT_MASK:
+       case MSR_IA32_RTIT_CR3_MATCH:
+       case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
+               /* PT MSRs. These are handled in pt_update_intercept_for_msr() */
+               return true;
+       }
+
+       r = possible_passthrough_msr_slot(msr) != -ENOENT;
+
+       WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+
+       return r;
+}
+
 static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
        spin_unlock(&vmx_vpid_lock);
 }
 
+static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+{
+       int f = sizeof(unsigned long);
+
+       if (msr <= 0x1fff)
+               __clear_bit(msr, msr_bitmap + 0x000 / f);
+       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+               __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+}
+
+static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+{
+       int f = sizeof(unsigned long);
+
+       if (msr <= 0x1fff)
+               __clear_bit(msr, msr_bitmap + 0x800 / f);
+       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+               __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+}
+
+static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+{
+       int f = sizeof(unsigned long);
+
+       if (msr <= 0x1fff)
+               __set_bit(msr, msr_bitmap + 0x000 / f);
+       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+               __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+}
+
+static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+{
+       int f = sizeof(unsigned long);
+
+       if (msr <= 0x1fff)
+               __set_bit(msr, msr_bitmap + 0x800 / f);
+       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+               __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+}
+
 static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
                                                          u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
-       int f = sizeof(unsigned long);
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
                evmcs_touch_msr_bitmap();
 
        /*
-        * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-        * have the write-low and read-high bitmap offsets the wrong way round.
-        * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-        */
-       if (msr <= 0x1fff) {
-               if (type & MSR_TYPE_R)
-                       /* read-low */
-                       __clear_bit(msr, msr_bitmap + 0x000 / f);
+        * Mark the desired intercept state in shadow bitmap, this is needed
+        * for resync when the MSR filters change.
+       */
+       if (is_valid_passthrough_msr(msr)) {
+               int idx = possible_passthrough_msr_slot(msr);
+
+               if (idx != -ENOENT) {
+                       if (type & MSR_TYPE_R)
+                               clear_bit(idx, vmx->shadow_msr_intercept.read);
+                       if (type & MSR_TYPE_W)
+                               clear_bit(idx, vmx->shadow_msr_intercept.write);
+               }
+       }
 
-               if (type & MSR_TYPE_W)
-                       /* write-low */
-                       __clear_bit(msr, msr_bitmap + 0x800 / f);
+       if ((type & MSR_TYPE_R) &&
+           !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
+               vmx_set_msr_bitmap_read(msr_bitmap, msr);
+               type &= ~MSR_TYPE_R;
+       }
 
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               if (type & MSR_TYPE_R)
-                       /* read-high */
-                       __clear_bit(msr, msr_bitmap + 0x400 / f);
+       if ((type & MSR_TYPE_W) &&
+           !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
+               vmx_set_msr_bitmap_write(msr_bitmap, msr);
+               type &= ~MSR_TYPE_W;
+       }
 
-               if (type & MSR_TYPE_W)
-                       /* write-high */
-                       __clear_bit(msr, msr_bitmap + 0xc00 / f);
+       if (type & MSR_TYPE_R)
+               vmx_clear_msr_bitmap_read(msr_bitmap, msr);
 
-       }
+       if (type & MSR_TYPE_W)
+               vmx_clear_msr_bitmap_write(msr_bitmap, msr);
 }
 
 static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
-       int f = sizeof(unsigned long);
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
                evmcs_touch_msr_bitmap();
 
        /*
-        * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-        * have the write-low and read-high bitmap offsets the wrong way round.
-        * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-        */
-       if (msr <= 0x1fff) {
-               if (type & MSR_TYPE_R)
-                       /* read-low */
-                       __set_bit(msr, msr_bitmap + 0x000 / f);
-
-               if (type & MSR_TYPE_W)
-                       /* write-low */
-                       __set_bit(msr, msr_bitmap + 0x800 / f);
-
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               if (type & MSR_TYPE_R)
-                       /* read-high */
-                       __set_bit(msr, msr_bitmap + 0x400 / f);
+        * Mark the desired intercept state in shadow bitmap, this is needed
+        * for resync when the MSR filter changes.
+       */
+       if (is_valid_passthrough_msr(msr)) {
+               int idx = possible_passthrough_msr_slot(msr);
+
+               if (idx != -ENOENT) {
+                       if (type & MSR_TYPE_R)
+                               set_bit(idx, vmx->shadow_msr_intercept.read);
+                       if (type & MSR_TYPE_W)
+                               set_bit(idx, vmx->shadow_msr_intercept.write);
+               }
+       }
 
-               if (type & MSR_TYPE_W)
-                       /* write-high */
-                       __set_bit(msr, msr_bitmap + 0xc00 / f);
+       if (type & MSR_TYPE_R)
+               vmx_set_msr_bitmap_read(msr_bitmap, msr);
 
-       }
+       if (type & MSR_TYPE_W)
+               vmx_set_msr_bitmap_write(msr_bitmap, msr);
 }
 
 static __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
        return mode;
 }
 
-static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
-                                        unsigned long *msr_bitmap, u8 mode)
+static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
 {
        int msr;
 
-       for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-               unsigned word = msr / BITS_PER_LONG;
-               msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
-               msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+       for (msr = 0x800; msr <= 0x8ff; msr++) {
+               bool intercepted = !!(mode & MSR_BITMAP_MODE_X2APIC_APICV);
+
+               vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_RW, intercepted);
        }
 
        if (mode & MSR_BITMAP_MODE_X2APIC) {
 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
        u8 mode = vmx_msr_bitmap_mode(vcpu);
        u8 changed = mode ^ vmx->msr_bitmap_mode;
 
                return;
 
        if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
-               vmx_update_msr_bitmap_x2apic(vcpu, msr_bitmap, mode);
+               vmx_update_msr_bitmap_x2apic(vcpu, mode);
 
        vmx->msr_bitmap_mode = mode;
 }
        return ((rvi & 0xf0) > (vppr & 0xf0));
 }
 
+static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u32 i;
+
+       /*
+        * Set intercept permissions for all potentially passed through MSRs
+        * again. They will automatically get filtered through the MSR filter,
+        * so we are back in sync after this.
+        */
+       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+               u32 msr = vmx_possible_passthrough_msrs[i];
+               bool read = test_bit(i, vmx->shadow_msr_intercept.read);
+               bool write = test_bit(i, vmx->shadow_msr_intercept.write);
+
+               vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_R, read);
+               vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_W, write);
+       }
+
+       pt_update_intercept_for_msr(vcpu);
+       vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
+}
+
 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
                                                     bool nested)
 {
        if (err < 0)
                goto free_pml;
 
+       /* The MSR bitmap starts with all ones */
+       bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+       bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+
        msr_bitmap = vmx->vmcs01.msr_bitmap;
        vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
        vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
        .can_emulate_instruction = vmx_can_emulate_instruction,
        .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
        .migrate_timers = vmx_migrate_timers,
+
+       .msr_filter_changed = vmx_msr_filter_changed,
 };
 
 static __init int hardware_setup(void)