}
 }
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
        tlb_flush(tlb);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
        free_pages_and_swap_cache(tlb->pages, tlb->nr);
        tlb->nr = 0;
        if (tlb->pages == tlb->local)
                __tlb_alloc_page(tlb);
 }
 
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+       tlb_flush_mmu_tlbonly(tlb);
+       tlb_flush_mmu_free(tlb);
+}
+
 static inline void
 tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 
 #define RR_RID_MASK    0x00000000ffffff00L
 #define RR_TO_RID(val)         ((val >> 8) & 0xffffff)
 
-/*
- * Flush the TLB for address range START to END and, if not in fast mode, release the
- * freed pages that where gathered up to this point.
- */
 static inline void
-ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
+ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
-       unsigned long i;
-       unsigned int nr;
-
-       if (!tlb->need_flush)
-               return;
        tlb->need_flush = 0;
 
        if (tlb->fullmm) {
                flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
        }
 
+}
+
+static inline void
+ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+       unsigned long i;
+       unsigned int nr;
+
        /* lastly, release the freed pages */
        nr = tlb->nr;
 
                free_page_and_swap_cache(tlb->pages[i]);
 }
 
+/*
+ * Flush the TLB for address range START to END and, if not in fast mode, release the
+ * freed pages that where gathered up to this point.
+ */
+static inline void
+ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+       if (!tlb->need_flush)
+               return;
+       ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
+       ia64_tlb_flush_mmu_free(tlb);
+}
+
 static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 {
        unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
        return tlb->max - tlb->nr;
 }
 
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+       ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+       ia64_tlb_flush_mmu_free(tlb);
+}
+
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
        ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
 
        tlb->batch = NULL;
 }
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
        __tlb_flush_mm_lazy(tlb->mm);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
        tlb_table_flush(tlb);
 }
 
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+       tlb_flush_mmu_tlbonly(tlb);
+       tlb_flush_mmu_free(tlb);
+}
+
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
                                  unsigned long start, unsigned long end)
 {
 
        }
 }
 
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+}
+
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
 }
 
 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                               unsigned long end);
 
+static inline void
+tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+       flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
+}
+
+static inline void
+tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+       init_tlb_gather(tlb);
+}
+
 static inline void
 tlb_flush_mmu(struct mmu_gather *tlb)
 {
        if (!tlb->need_flush)
                return;
 
-       flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-       init_tlb_gather(tlb);
+       tlb_flush_mmu_tlbonly(tlb);
+       tlb_flush_mmu_free(tlb);
 }
 
 /* tlb_finish_mmu
 
 #endif
 }
 
-void tlb_flush_mmu(struct mmu_gather *tlb)
+static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-       struct mmu_gather_batch *batch;
-
-       if (!tlb->need_flush)
-               return;
        tlb->need_flush = 0;
        tlb_flush(tlb);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
        tlb_table_flush(tlb);
 #endif
+}
+
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+       struct mmu_gather_batch *batch;
 
        for (batch = &tlb->local; batch; batch = batch->next) {
                free_pages_and_swap_cache(batch->pages, batch->nr);
        tlb->active = &tlb->local;
 }
 
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+       if (!tlb->need_flush)
+               return;
+       tlb_flush_mmu_tlbonly(tlb);
+       tlb_flush_mmu_free(tlb);
+}
+
 /* tlb_finish_mmu
  *     Called at the end of the shootdown operation to free up any resources
  *     that were required.
                        if (PageAnon(page))
                                rss[MM_ANONPAGES]--;
                        else {
-                               if (pte_dirty(ptent))
+                               if (pte_dirty(ptent)) {
+                                       force_flush = 1;
                                        set_page_dirty(page);
+                               }
                                if (pte_young(ptent) &&
                                    likely(!(vma->vm_flags & VM_SEQ_READ)))
                                        mark_page_accessed(page);
                        page_remove_rmap(page);
                        if (unlikely(page_mapcount(page) < 0))
                                print_bad_pte(vma, addr, ptent, page);
-                       force_flush = !__tlb_remove_page(tlb, page);
-                       if (force_flush)
+                       if (unlikely(!__tlb_remove_page(tlb, page))) {
+                               force_flush = 1;
                                break;
+                       }
                        continue;
                }
                /*
 
        add_mm_rss_vec(mm, rss);
        arch_leave_lazy_mmu_mode();
-       pte_unmap_unlock(start_pte, ptl);
 
-       /*
-        * mmu_gather ran out of room to batch pages, we break out of
-        * the PTE lock to avoid doing the potential expensive TLB invalidate
-        * and page-free while holding it.
-        */
+       /* Do the actual TLB flush before dropping ptl */
        if (force_flush) {
                unsigned long old_end;
 
-               force_flush = 0;
-
                /*
                 * Flush the TLB just for the previous segment,
                 * then update the range to be the remaining
                 */
                old_end = tlb->end;
                tlb->end = addr;
-
-               tlb_flush_mmu(tlb);
-
+               tlb_flush_mmu_tlbonly(tlb);
                tlb->start = addr;
                tlb->end = old_end;
+       }
+       pte_unmap_unlock(start_pte, ptl);
+
+       /*
+        * If we forced a TLB flush (either due to running out of
+        * batch buffers or because we needed to flush dirty TLB
+        * entries before releasing the ptl), free the batched
+        * memory too. Restart if we didn't do everything.
+        */
+       if (force_flush) {
+               force_flush = 0;
+               tlb_flush_mmu_free(tlb);
 
                if (addr != end)
                        goto again;