vldr            \out\()h, \sym + 8
        .endm
 
-       .macro          __adr, reg, lbl
-       adr             \reg, \lbl
-THUMB( orr             \reg, \reg, #1          )
-       .endm
-
        .macro          in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
        veor            \b2, \b2, \b1
        veor            \b5, \b5, \b6
        push            {r4-r6, lr}
        ldr             r5, [sp, #16]           // number of blocks
 
-99:    __adr           ip, 0f
+99:    adr             ip, 0f
        and             lr, r5, #7
        cmp             r5, #8
        sub             ip, ip, lr, lsl #2
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q0}, [r1]!
        vld1.8          {q1}, [r1]!
        mov             rounds, r3
        bl              \do8
 
-       __adr           ip, 1f
+       adr             ip, 1f
        and             lr, r5, #7
        cmp             r5, #8
        sub             ip, ip, lr, lsl #2
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vst1.8          {\o0}, [r0]!
        vst1.8          {\o1}, [r0]!
        push            {r4-r6, lr}
        ldm             ip, {r5-r6}             // load args 4-5
 
-99:    __adr           ip, 0f
+99:    adr             ip, 0f
        and             lr, r5, #7
        cmp             r5, #8
        sub             ip, ip, lr, lsl #2
        mov             lr, r1
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q0}, [lr]!
        vld1.8          {q1}, [lr]!
        vmov            q14, q8
        vmov            q15, q8
 
-       __adr           ip, 1f
+       adr             ip, 1f
        and             lr, r5, #7
        cmp             r5, #8
        sub             ip, ip, lr, lsl #2
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q9}, [r1]!
        vld1.8          {q10}, [r1]!
        vld1.8          {q15}, [r1]!
        W(nop)
 
-1:     __adr           ip, 2f
+1:     adr             ip, 2f
        sub             ip, ip, lr, lsl #3
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        veor            q0, q0, q8
        vst1.8          {q0}, [r0]!
        vmov            q6, q0
        vmov            q7, q0
 
-       __adr           ip, 0f
+       adr             ip, 0f
        sub             lr, r5, #1
        and             lr, lr, #7
        cmp             r5, #8
        sub             ip, ip, lr, lsl #5
        sub             ip, ip, lr, lsl #2
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        next_ctr        q1
        next_ctr        q2
        mov             rounds, r3
        bl              aesbs_encrypt8
 
-       __adr           ip, 1f
+       adr             ip, 1f
        and             lr, r5, #7
        cmp             r5, #8
        movgt           r4, #0
        ldrle           r4, [sp, #40]           // load final in the last round
        sub             ip, ip, lr, lsl #2
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q8}, [r1]!
        vld1.8          {q9}, [r1]!
 1:     bne             2f
        vld1.8          {q15}, [r1]!
 
-2:     __adr           ip, 3f
+2:     adr             ip, 3f
        cmp             r5, #8
        sub             ip, ip, lr, lsl #3
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        veor            q0, q0, q8
        vst1.8          {q0}, [r0]!
        vshr.u64        d30, d31, #7
        vmov            q12, q14
 
-       __adr           ip, 0f
+       adr             ip, 0f
        and             r4, r6, #7
        cmp             r6, #8
        sub             ip, ip, r4, lsl #5
        mov             r4, sp
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q0}, [r1]!
        next_tweak      q12, q14, q15, q13
        mov             rounds, r3
        bl              \do8
 
-       __adr           ip, 0f
+       adr             ip, 0f
        and             lr, r6, #7
        cmp             r6, #8
        sub             ip, ip, lr, lsl #2
        mov             r4, sp
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        vld1.8          {q8}, [r4, :128]!
        vld1.8          {q9}, [r4, :128]!
        vld1.8          {q14}, [r4, :128]!
        vld1.8          {q15}, [r4, :128]
 
-0:     __adr           ip, 1f
+0:     adr             ip, 1f
        sub             ip, ip, lr, lsl #3
-       bxlt            ip                      // computed goto if blocks < 8
+       movlt           pc, ip                  // computed goto if blocks < 8
 
        veor            \o0, \o0, q8
        vst1.8          {\o0}, [r0]!