* on the linear mapping */
        /* SLB related definitions */
        u16 vmalloc_sllp;
-       u16 slb_cache_ptr;
+       u8 slb_cache_ptr;
+       u8 stab_rr;                     /* stab/slb round-robin counter */
+       u32 slb_used_bitmap;            /* Bitmaps for first 32 SLB entries. */
+       u32 slb_kern_bitmap;
        u32 slb_cache[SLB_CACHE_ENTRIES];
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
         */
        struct task_struct *__current;  /* Pointer to current */
        u64 kstack;                     /* Saved Kernel stack addr */
-       u64 stab_rr;                    /* stab/slb round-robin counter */
        u64 saved_r1;                   /* r1 save for RTAS calls or PM or EE=0 */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u16 trap_save;                  /* Used when bad stack is encountered */
 
        OFFSET(PACAKSAVE, paca_struct, kstack);
        OFFSET(PACACURRENT, paca_struct, __current);
        OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
-       OFFSET(PACASTABRR, paca_struct, stab_rr);
        OFFSET(PACAR1, paca_struct, saved_r1);
        OFFSET(PACATOC, paca_struct, kernel_toc);
        OFFSET(PACAKBASE, paca_struct, kernelbase);
 #ifdef CONFIG_PPC_BOOK3S_64
        OFFSET(PACASLBCACHE, paca_struct, slb_cache);
        OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+       OFFSET(PACASTABRR, paca_struct, stab_rr);
        OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
 #ifdef CONFIG_PPC_MM_SLICES
        OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
 
 {
        __slb_restore_bolted_realmode();
        get_paca()->slb_cache_ptr = 0;
+
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 }
 
 /*
  */
 void slb_flush_all_realmode(void)
 {
-       /*
-        * This flushes all SLB entries including 0, so it must be realmode.
-        */
        asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 }
 
                     : "memory");
 
        get_paca()->slb_cache_ptr = 0;
+
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 }
 
 void slb_save_contents(struct slb_entry *slb_ptr)
                return;
 
        pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
-       pr_err("Last SLB entry inserted at slot %lld\n", get_paca()->stab_rr);
+       pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr);
 
        for (i = 0; i < mmu_slb_size; i++) {
                e = slb_ptr->esid;
                                     "isync"
                                     :: "r"(ksp_vsid_data),
                                        "r"(ksp_esid_data));
+
+                       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
                }
 
                get_paca()->slb_cache_ptr = 0;
        }
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 
        copy_mm_to_paca(mm);
 
        }
 
        get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 
        lflags = SLB_VSID_KERNEL | linear_llp;
 
        }
 }
 
-static enum slb_index alloc_slb_index(void)
+static enum slb_index alloc_slb_index(bool kernel)
 {
        enum slb_index index;
 
-       /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
-       index = get_paca()->stab_rr;
-       if (index < (mmu_slb_size - 1))
-               index++;
-       else
-               index = SLB_NUM_BOLTED;
-       get_paca()->stab_rr = index;
+       /*
+        * The allocation bitmaps can become out of synch with the SLB
+        * when the _switch code does slbie when bolting a new stack
+        * segment and it must not be anywhere else in the SLB. This leaves
+        * a kernel allocated entry that is unused in the SLB. With very
+        * large systems or small segment sizes, the bitmaps could slowly
+        * fill with these entries. They will eventually be cleared out
+        * by the round robin allocator in that case, so it's probably not
+        * worth accounting for.
+        */
+
+       /*
+        * SLBs beyond 32 entries are allocated with stab_rr only
+        * POWER7/8/9 have 32 SLB entries, this could be expanded if a
+        * future CPU has more.
+        */
+       if (local_paca->slb_used_bitmap != U32_MAX) {
+               index = ffz(local_paca->slb_used_bitmap);
+               local_paca->slb_used_bitmap |= 1U << index;
+               if (kernel)
+                       local_paca->slb_kern_bitmap |= 1U << index;
+       } else {
+               /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
+               index = local_paca->stab_rr;
+               if (index < (mmu_slb_size - 1))
+                       index++;
+               else
+                       index = SLB_NUM_BOLTED;
+               local_paca->stab_rr = index;
+               if (index < 32) {
+                       if (kernel)
+                               local_paca->slb_kern_bitmap |= 1U << index;
+                       else
+                               local_paca->slb_kern_bitmap &= ~(1U << index);
+               }
+       }
+       BUG_ON(index < SLB_NUM_BOLTED);
 
        return index;
 }
         */
        barrier();
 
-       index = alloc_slb_index();
+       index = alloc_slb_index(kernel);
 
        vsid_data = __mk_vsid_data(vsid, ssize, flags);
        esid_data = mk_esid_data(ea, ssize, index);