int table_index, u64 *pte_ret_p);
 extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                        struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+                       unsigned int shift, struct kvm_memory_slot *memslot,
+                       unsigned int lpid);
 extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
                                    bool writing, unsigned long gpa,
                                    unsigned int lpid);
 
        struct kvm_nested_guest *next;
 };
 
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0000000000000  12-bit lpid field
+ * 0x000FFFFFFFFFF000  40-bit guest 4k page frame number
+ * 0x0000000000000001  1-bit  single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK          0xFFF0000000000000UL
+#define RMAP_NESTED_LPID_SHIFT         (52)
+#define RMAP_NESTED_GPA_MASK           0x000FFFFFFFFFF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY    0x0000000000000001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+       struct llist_node list;
+       u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ *                          safe against removal of the list entry or NULL list
+ * @pos:       a (struct rmap_nested *) to use as a loop cursor
+ * @node:      pointer to the first entry
+ *             NOTE: this can be NULL
+ * @rmapp:     an (unsigned long *) in which to return the rmap entries on each
+ *             iteration
+ *             NOTE: this must point to already allocated memory
+ *
+ * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
+ * rmap entry in the memslot. The list is always terminated by a "single entry"
+ * stored in the list element of the final entry of the llist. If there is ONLY
+ * a single entry then this is itself in the rmap entry of the memslot, not a
+ * llist head pointer.
+ *
+ * Note that the iterator below assumes that a nested rmap entry is always
+ * non-zero.  This is true for our usage because the LPID field is always
+ * non-zero (zero is reserved for the host).
+ *
+ * This should be used to iterate over the list of rmap_nested entries with
+ * processing done on the u64 rmap value given by each iteration. This is safe
+ * against removal of list entries and it is always safe to call free on (pos).
+ *
+ * e.g.
+ * struct rmap_nested *cursor;
+ * struct llist_node *first;
+ * unsigned long rmap;
+ * for_each_nest_rmap_safe(cursor, first, &rmap) {
+ *     do_something(rmap);
+ *     free(cursor);
+ * }
+ */
+#define for_each_nest_rmap_safe(pos, node, rmapp)                             \
+       for ((pos) = llist_entry((node), typeof(*(pos)), list);                \
+            (node) &&                                                         \
+            (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?     \
+                         ((u64) (node)) : ((pos)->rmap))) &&                  \
+            (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?      \
+                        ((struct llist_node *) ((pos) = NULL)) :              \
+                        (pos)->list.next)), true);                            \
+            (pos) = llist_entry((node), typeof(*(pos)), list))
+
 struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
                                          bool create);
 void kvmhv_put_nested(struct kvm_nested_guest *gp);
 
 extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
                             unsigned long gpa, unsigned int level,
-                            unsigned long mmu_seq, unsigned int lpid);
+                            unsigned long mmu_seq, unsigned int lpid,
+                            unsigned long *rmapp, struct rmap_nested **n_rmap);
+extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+                                  struct rmap_nested **n_rmap);
+extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+                               struct kvm_memory_slot *memslot,
+                               unsigned long gpa, unsigned long hpa,
+                               unsigned long nbytes);
 
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 
        kmem_cache_free(kvm_pmd_cache, pmdp);
 }
 
-void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
-                     unsigned long gpa, unsigned int shift,
-                     struct kvm_memory_slot *memslot,
+/* Called with kvm->mmu_lock held */
+void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+                     unsigned int shift, struct kvm_memory_slot *memslot,
                      unsigned int lpid)
 
 {
        unsigned long old;
+       unsigned long gfn = gpa >> PAGE_SHIFT;
+       unsigned long page_size = PAGE_SIZE;
+       unsigned long hpa;
 
        old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
        kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
-       if ((old & _PAGE_DIRTY) && (lpid == kvm->arch.lpid)) {
-               unsigned long gfn = gpa >> PAGE_SHIFT;
-               unsigned long page_size = PAGE_SIZE;
 
-               if (shift)
-                       page_size = 1ul << shift;
+       /* The following only applies to L1 entries */
+       if (lpid != kvm->arch.lpid)
+               return;
+
+       if (!memslot) {
+               memslot = gfn_to_memslot(kvm, gfn);
                if (!memslot)
-                       memslot = gfn_to_memslot(kvm, gfn);
-               if (memslot && memslot->dirty_bitmap)
-                       kvmppc_update_dirty_map(memslot, gfn, page_size);
+                       return;
        }
+       if (shift)
+               page_size = 1ul << shift;
+
+       gpa &= ~(page_size - 1);
+       hpa = old & PTE_RPN_MASK;
+       kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
+
+       if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
+               kvmppc_update_dirty_map(memslot, gfn, page_size);
 }
 
 /*
 
 int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
                      unsigned long gpa, unsigned int level,
-                     unsigned long mmu_seq, unsigned int lpid)
+                     unsigned long mmu_seq, unsigned int lpid,
+                     unsigned long *rmapp, struct rmap_nested **n_rmap)
 {
        pgd_t *pgd;
        pud_t *pud, *new_pud = NULL;
                        kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
                }
                kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+               if (rmapp && n_rmap)
+                       kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
                ret = 0;
                goto out_unlock;
        }
                        kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
                }
                kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+               if (rmapp && n_rmap)
+                       kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
                ret = 0;
                goto out_unlock;
        }
                goto out_unlock;
        }
        kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+       if (rmapp && n_rmap)
+               kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
        ret = 0;
 
  out_unlock:
 
        /* Allocate space in the tree and write the PTE */
        ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
-                               mmu_seq, kvm->arch.lpid);
+                               mmu_seq, kvm->arch.lpid, NULL, NULL);
        if (inserted_pte)
                *inserted_pte = pte;
        if (levelp)
 
        kvmppc_free_hpt(&kvm->arch.hpt);
        kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
                           LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+       kvmppc_rmap_reset(kvm);
        kvm->arch.radix = 1;
        return 0;
 }
 
 
 #include <linux/kernel.h>
 #include <linux/kvm_host.h>
+#include <linux/llist.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 static struct patb_entry *pseries_partition_tb;
 
 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
 
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 {
        int i;
        struct kvm_nested_guest *gp;
        struct kvm_nested_guest *freelist = NULL;
+       struct kvm_memory_slot *memslot;
+       int srcu_idx;
 
        spin_lock(&kvm->mmu_lock);
        for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
                freelist = gp->next;
                kvmhv_release_nested(gp);
        }
+
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+               kvmhv_free_memslot_nest_rmap(memslot);
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
 }
 
 /* caller must hold gp->tlb_lock */
                kvmhv_release_nested(gp);
 }
 
+static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
+{
+       if (lpid > kvm->arch.max_nested_lpid)
+               return NULL;
+       return kvm->arch.nested_guests[lpid];
+}
+
+static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
+{
+       return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
+                                      RMAP_NESTED_GPA_MASK));
+}
+
+void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+                           struct rmap_nested **n_rmap)
+{
+       struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+       struct rmap_nested *cursor;
+       u64 rmap, new_rmap = (*n_rmap)->rmap;
+
+       /* Are there any existing entries? */
+       if (!(*rmapp)) {
+               /* No -> use the rmap as a single entry */
+               *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
+               return;
+       }
+
+       /* Do any entries match what we're trying to insert? */
+       for_each_nest_rmap_safe(cursor, entry, &rmap) {
+               if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
+                       return;
+       }
+
+       /* Do we need to create a list or just add the new entry? */
+       rmap = *rmapp;
+       if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+               *rmapp = 0UL;
+       llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
+       if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+               (*n_rmap)->list.next = (struct llist_node *) rmap;
+
+       /* Set NULL so not freed by caller */
+       *n_rmap = NULL;
+}
+
+static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
+                                  unsigned long hpa, unsigned long mask)
+{
+       struct kvm_nested_guest *gp;
+       unsigned long gpa;
+       unsigned int shift, lpid;
+       pte_t *ptep;
+
+       gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+       lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+       gp = kvmhv_find_nested(kvm, lpid);
+       if (!gp)
+               return;
+
+       /* Find and invalidate the pte */
+       ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+       /* Don't spuriously invalidate ptes if the pfn has changed */
+       if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
+               kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+}
+
+static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
+                                       unsigned long hpa, unsigned long mask)
+{
+       struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
+       struct rmap_nested *cursor;
+       unsigned long rmap;
+
+       for_each_nest_rmap_safe(cursor, entry, &rmap) {
+               kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
+               kfree(cursor);
+       }
+}
+
+/* called with kvm->mmu_lock held */
+void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+                                 struct kvm_memory_slot *memslot,
+                                 unsigned long gpa, unsigned long hpa,
+                                 unsigned long nbytes)
+{
+       unsigned long gfn, end_gfn;
+       unsigned long addr_mask;
+
+       if (!memslot)
+               return;
+       gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
+       end_gfn = gfn + (nbytes >> PAGE_SHIFT);
+
+       addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
+       hpa &= addr_mask;
+
+       for (; gfn < end_gfn; gfn++) {
+               unsigned long *rmap = &memslot->arch.rmap[gfn];
+               kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
+       }
+}
+
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
+{
+       unsigned long page;
+
+       for (page = 0; page < free->npages; page++) {
+               unsigned long rmap, *rmapp = &free->arch.rmap[page];
+               struct rmap_nested *cursor;
+               struct llist_node *entry;
+
+               entry = llist_del_all((struct llist_head *) rmapp);
+               for_each_nest_rmap_safe(cursor, entry, &rmap)
+                       kfree(cursor);
+       }
+}
+
 static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
                                        struct kvm_nested_guest *gp,
                                        long gpa, int *shift_ret)
 {
        struct kvm *kvm = vcpu->kvm;
        struct kvm_memory_slot *memslot;
+       struct rmap_nested *n_rmap;
        struct kvmppc_pte gpte;
        pte_t pte, *pte_p;
        unsigned long mmu_seq;
        unsigned long dsisr = vcpu->arch.fault_dsisr;
        unsigned long ea = vcpu->arch.fault_dar;
+       unsigned long *rmapp;
        unsigned long n_gpa, gpa, gfn, perm = 0UL;
        unsigned int shift, l1_shift, level;
        bool writing = !!(dsisr & DSISR_ISSTORE);
 
        /* 4. Insert the pte into our shadow_pgtable */
 
+       n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
+       if (!n_rmap)
+               return RESUME_GUEST; /* Let the guest try again */
+       n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
+               (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
+       rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
        ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
-                               mmu_seq, gp->shadow_lpid);
+                               mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
+       if (n_rmap)
+               kfree(n_rmap);
        if (ret == -EAGAIN)
                ret = RESUME_GUEST;     /* Let the guest try again */