L2_ptep         \pgd,\pte,\index,\va,\fault
        .endm
 
+       /* Acquire pa_dbit_lock lock. */
+       .macro          dbit_lock       spc,tmp,tmp1
+#ifdef CONFIG_SMP
+       cmpib,COND(=),n 0,\spc,2f
+       load32          PA(pa_dbit_lock),\tmp
+1:     LDCW            0(\tmp),\tmp1
+       cmpib,COND(=)   0,\tmp1,1b
+       nop
+2:
+#endif
+       .endm
+
+       /* Release pa_dbit_lock lock without reloading lock address. */
+       .macro          dbit_unlock0    spc,tmp
+#ifdef CONFIG_SMP
+       or,COND(=)      %r0,\spc,%r0
+       stw             \spc,0(\tmp)
+#endif
+       .endm
+
+       /* Release pa_dbit_lock lock. */
+       .macro          dbit_unlock1    spc,tmp
+#ifdef CONFIG_SMP
+       load32          PA(pa_dbit_lock),\tmp
+       dbit_unlock0    \spc,\tmp
+#endif
+       .endm
+
        /* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
         * don't needlessly dirty the cache line if it was already set */
-       .macro          update_ptep     ptep,pte,tmp,tmp1
+       .macro          update_ptep     spc,ptep,pte,tmp,tmp1
+#ifdef CONFIG_SMP
+       or,COND(=)      %r0,\spc,%r0
+       LDREG           0(\ptep),\pte
+#endif
        ldi             _PAGE_ACCESSED,\tmp1
        or              \tmp1,\pte,\tmp
        and,COND(<>)    \tmp1,\pte,%r0
 
        /* Set the dirty bit (and accessed bit).  No need to be
         * clever, this is only used from the dirty fault */
-       .macro          update_dirty    ptep,pte,tmp
+       .macro          update_dirty    spc,ptep,pte,tmp
+#ifdef CONFIG_SMP
+       or,COND(=)      %r0,\spc,%r0
+       LDREG           0(\ptep),\pte
+#endif
        ldi             _PAGE_ACCESSED|_PAGE_DIRTY,\tmp
        or              \tmp,\pte,\pte
        STREG           \pte,0(\ptep)
 
        L3_ptep         ptp,pte,t0,va,dtlb_check_alias_20w
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
        
        idtlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L3_ptep         ptp,pte,t0,va,nadtlb_check_alias_20w
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        idtlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,dtlb_check_alias_11
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb_11      spc,pte,prot
 
        idtlbp          prot,(%sr1,va)
 
        mtsp            t0, %sr1        /* Restore sr1 */
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,nadtlb_check_alias_11
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb_11      spc,pte,prot
 
        idtlbp          prot,(%sr1,va)
 
        mtsp            t0, %sr1        /* Restore sr1 */
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,dtlb_check_alias_20
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        f_extend        pte,t0
 
        idtlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,nadtlb_check_alias_20
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        f_extend        pte,t0
        
         idtlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L3_ptep         ptp,pte,t0,va,itlb_fault
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
        
        iitlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L3_ptep         ptp,pte,t0,va,naitlb_check_alias_20w
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        iitlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,itlb_fault
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb_11      spc,pte,prot
 
        iitlbp          prot,(%sr1,va)
 
        mtsp            t0, %sr1        /* Restore sr1 */
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,naitlb_check_alias_11
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb_11      spc,pte,prot
 
        iitlbp          prot,(%sr1,va)
 
        mtsp            t0, %sr1        /* Restore sr1 */
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,itlb_fault
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        f_extend        pte,t0  
 
        iitlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,naitlb_check_alias_20
 
-       update_ptep     ptp,pte,t0,t1
+       dbit_lock       spc,t0,t1
+       update_ptep     spc,ptp,pte,t0,t1
 
        make_insert_tlb spc,pte,prot
 
        f_extend        pte,t0
 
        iitlbt          pte,prot
+       dbit_unlock1    spc,t0
 
        rfir
        nop
 
        L3_ptep         ptp,pte,t0,va,dbit_fault
 
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nolock_20w
-       load32          PA(pa_dbit_lock),t0
-
-dbit_spin_20w:
-       LDCW            0(t0),t1
-       cmpib,COND(=)         0,t1,dbit_spin_20w
-       nop
-
-dbit_nolock_20w:
-#endif
-       update_dirty    ptp,pte,t1
+       dbit_lock       spc,t0,t1
+       update_dirty    spc,ptp,pte,t1
 
        make_insert_tlb spc,pte,prot
                
        idtlbt          pte,prot
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nounlock_20w
-       ldi             1,t1
-       stw             t1,0(t0)
-
-dbit_nounlock_20w:
-#endif
+       dbit_unlock0    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,dbit_fault
 
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nolock_11
-       load32          PA(pa_dbit_lock),t0
-
-dbit_spin_11:
-       LDCW            0(t0),t1
-       cmpib,=         0,t1,dbit_spin_11
-       nop
-
-dbit_nolock_11:
-#endif
-       update_dirty    ptp,pte,t1
+       dbit_lock       spc,t0,t1
+       update_dirty    spc,ptp,pte,t1
 
        make_insert_tlb_11      spc,pte,prot
 
        idtlbp          prot,(%sr1,va)
 
        mtsp            t1, %sr1     /* Restore sr1 */
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nounlock_11
-       ldi             1,t1
-       stw             t1,0(t0)
-
-dbit_nounlock_11:
-#endif
+       dbit_unlock0    spc,t0
 
        rfir
        nop
 
        L2_ptep         ptp,pte,t0,va,dbit_fault
 
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nolock_20
-       load32          PA(pa_dbit_lock),t0
-
-dbit_spin_20:
-       LDCW            0(t0),t1
-       cmpib,=         0,t1,dbit_spin_20
-       nop
-
-dbit_nolock_20:
-#endif
-       update_dirty    ptp,pte,t1
+       dbit_lock       spc,t0,t1
+       update_dirty    spc,ptp,pte,t1
 
        make_insert_tlb spc,pte,prot
 
        f_extend        pte,t1
        
         idtlbt          pte,prot
-
-#ifdef CONFIG_SMP
-       cmpib,COND(=),n        0,spc,dbit_nounlock_20
-       ldi             1,t1
-       stw             t1,0(t0)
-
-dbit_nounlock_20:
-#endif
+       dbit_unlock0    spc,t0
 
        rfir
        nop