KVM: x86: Do not write protect huge page in initially-all-set mode

author Keqian Zhu <zhukeqian1@huawei.com>

Thu, 29 Apr 2021 03:41:15 +0000 (11:41 +0800)

committer Paolo Bonzini <pbonzini@redhat.com>

Thu, 17 Jun 2021 17:09:25 +0000 (13:09 -0400)
author Keqian Zhu <zhukeqian1@huawei.com>
Thu, 29 Apr 2021 03:41:15 +0000 (11:41 +0800)
committer Paolo Bonzini <pbonzini@redhat.com>
Thu, 17 Jun 2021 17:09:25 +0000 (13:09 -0400)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index a668d2050b7955cf1114124eef94038804b3d998..66e4d096fe05a10c1edeee06f78176c1eae27154 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1172,8 +1172,7 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
   * @gfn_offset: start of the BITS_PER_LONG pages we care about
   * @mask: indicates which pages we should protect
   *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
+ * Used when we do not need to care about huge page mappings.
   */
  static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
                                      struct kvm_memory_slot *slot,
@@ -1230,13 +1229,36 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
   * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
   * enable dirty logging for them.
   *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
+ * We need to care about huge page mappings: e.g. during dirty logging we may
+ * have such mappings.
   */
  void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                 struct kvm_memory_slot *slot,
                                 gfn_t gfn_offset, unsigned long mask)
  {
+       /*
+        * Huge pages are NOT write protected when we start dirty logging in
+        * initially-all-set mode; must write protect them here so that they
+        * are split to 4K on the first write.
+        *
+        * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn
+        * of memslot has no such restriction, so the range can cross two large
+        * pages.
+        */
+       if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
+               gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask);
+               gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
+
+               kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
+
+               /* Cross two large pages? */
+               if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) !=
+                   ALIGN(end << PAGE_SHIFT, PMD_SIZE))
+                       kvm_mmu_slot_gfn_write_protect(kvm, slot, end,
+                                                      PG_LEVEL_2M);
+       }
+
+       /* Now handle 4K PTEs.  */
         if (kvm_x86_ops.cpu_dirty_log_size)
                 kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
         else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 6d425310054b4f52d8dcd07777b12ee1337336f2..4ae708eb35f5c7c64f1dc70d9c96b5686a93df1e 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11103,36 +11103,19 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                  */
                 kvm_mmu_zap_collapsible_sptes(kvm, new);
         } else {
-               /* By default, write-protect everything to log writes. */
-               int level = PG_LEVEL_4K;
+               /*
+                * Initially-all-set does not require write protecting any page,
+                * because they're all assumed to be dirty.
+                */
+               if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+                       return;
  
                 if (kvm_x86_ops.cpu_dirty_log_size) {
-                       /*
-                        * Clear all dirty bits, unless pages are treated as
-                        * dirty from the get-go.
-                        */
-                       if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
-                               kvm_mmu_slot_leaf_clear_dirty(kvm, new);
-
-                       /*
-                        * Write-protect large pages on write so that dirty
-                        * logging happens at 4k granularity.  No need to
-                        * write-protect small SPTEs since write accesses are
-                        * logged by the CPU via dirty bits.
-                        */
-                       level = PG_LEVEL_2M;
-               } else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
-                       /*
-                        * If we're with initial-all-set, we don't need
-                        * to write protect any small page because
-                        * they're reported as dirty already.  However
-                        * we still need to write-protect huge pages
-                        * so that the page split can happen lazily on
-                        * the first write to the huge page.
-                        */
-                       level = PG_LEVEL_2M;
+                       kvm_mmu_slot_leaf_clear_dirty(kvm, new);
+                       kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
+               } else {
+                       kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
                 }
-               kvm_mmu_slot_remove_write_access(kvm, new, level);
         }
  }
author	Keqian Zhu <zhukeqian1@huawei.com>
	Thu, 29 Apr 2021 03:41:15 +0000 (11:41 +0800)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 17 Jun 2021 17:09:25 +0000 (13:09 -0400)
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history