zap_pte_range loops from @addr to @end.  In the middle, if it runs out of
batching slots, TLB entries needs to be flushed for @start to @interim,
NOT @interim to @end.
Since ARC port doesn't use page free batching I can't test it myself but
this seems like the right thing to do.
Observed this when working on a fix for the issue at thread:
http://www.spinics.net/lists/linux-arch/msg21736.html
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
        spinlock_t *ptl;
        pte_t *start_pte;
        pte_t *pte;
+       unsigned long range_start = addr;
 
 again:
        init_rss_vec(rss);
                force_flush = 0;
 
 #ifdef HAVE_GENERIC_MMU_GATHER
-               tlb->start = addr;
-               tlb->end = end;
+               tlb->start = range_start;
+               tlb->end = addr;
 #endif
                tlb_flush_mmu(tlb);
-               if (addr != end)
+               if (addr != end) {
+                       range_start = addr;
                        goto again;
+               }
        }
 
        return addr;