return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV */
+       *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+       /*
+        * Copy each region from the possibly compacted offset to the
+        * non-compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *src = get_xsave_addr(xsave, feature);
+
+               if (src) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest + offset, src, size);
+               }
+
+               valid -= feature;
+       }
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV and possibly XCOMP_BV.  */
+       xsave->xsave_hdr.xstate_bv = xstate_bv;
+       if (cpu_has_xsaves)
+               xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+       /*
+        * Copy each region from the non-compacted offset to the
+        * possibly compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *dest = get_xsave_addr(xsave, feature);
+
+               if (dest) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest, src + offset, size);
+               } else
+                       WARN_ON_ONCE(1);
+
+               valid -= feature;
+       }
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
        if (cpu_has_xsave) {
-               memcpy(guest_xsave->region,
-                       &vcpu->arch.guest_fpu.state->xsave,
-                       vcpu->arch.guest_xstate_size);
-               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+               memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+               fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->fxsave,
                 */
                if (xstate_bv & ~kvm_supported_xcr0())
                        return -EINVAL;
-               memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+               load_xsave(vcpu, (u8 *)guest_xsave->region);
        } else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
                return err;
 
        fpu_finit(&vcpu->arch.guest_fpu);
+       if (cpu_has_xsaves)
+               vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+                       host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
        /*
         * Ensure guest xcr0 is valid for loading