}
 
 static void domain_flush_pages(struct protection_domain *domain,
-                              u64 address, size_t size)
+                              u64 address, size_t size, int pde)
 {
-       __domain_flush_pages(domain, address, size, 0);
+       if (likely(!amd_iommu_np_cache)) {
+               __domain_flush_pages(domain, address, size, pde);
+               return;
+       }
+
+       /*
+        * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
+        * In such setups it is best to avoid flushes of ranges which are not
+        * naturally aligned, since it would lead to flushes of unmodified
+        * PTEs. Such flushes would require the hypervisor to do more work than
+        * necessary. Therefore, perform repeated flushes of aligned ranges
+        * until you cover the range. Each iteration flushes the smaller
+        * between the natural alignment of the address that we flush and the
+        * greatest naturally aligned region that fits in the range.
+        */
+       while (size != 0) {
+               int addr_alignment = __ffs(address);
+               int size_alignment = __fls(size);
+               int min_alignment;
+               size_t flush_size;
+
+               /*
+                * size is always non-zero, but address might be zero, causing
+                * addr_alignment to be negative. As the casting of the
+                * argument in __ffs(address) to long might trim the high bits
+                * of the address on x86-32, cast to long when doing the check.
+                */
+               if (likely((unsigned long)address != 0))
+                       min_alignment = min(addr_alignment, size_alignment);
+               else
+                       min_alignment = size_alignment;
+
+               flush_size = 1ul << min_alignment;
+
+               __domain_flush_pages(domain, address, flush_size, pde);
+               address += flush_size;
+               size -= flush_size;
+       }
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
 void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
-       __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+       domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
 void amd_iommu_domain_flush_complete(struct protection_domain *domain)
                unsigned long flags;
 
                spin_lock_irqsave(&domain->lock, flags);
-               domain_flush_pages(domain, iova, size);
+               domain_flush_pages(domain, iova, size, 1);
                amd_iommu_domain_flush_complete(domain);
                spin_unlock_irqrestore(&domain->lock, flags);
        }
        unsigned long flags;
 
        spin_lock_irqsave(&dom->lock, flags);
-       __domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+       domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
        amd_iommu_domain_flush_complete(dom);
        spin_unlock_irqrestore(&dom->lock, flags);
 }