*                           u32 *macp, u8 const rk[], u32 rounds);
         */
 ENTRY(ce_aes_ccm_auth_data)
-       frame_push      7
-
-       mov     x19, x0
-       mov     x20, x1
-       mov     x21, x2
-       mov     x22, x3
-       mov     x23, x4
-       mov     x24, x5
-
-       ldr     w25, [x22]                      /* leftover from prev round? */
+       ldr     w8, [x3]                        /* leftover from prev round? */
        ld1     {v0.16b}, [x0]                  /* load mac */
-       cbz     w25, 1f
-       sub     w25, w25, #16
+       cbz     w8, 1f
+       sub     w8, w8, #16
        eor     v1.16b, v1.16b, v1.16b
-0:     ldrb    w7, [x20], #1                   /* get 1 byte of input */
-       subs    w21, w21, #1
-       add     w25, w25, #1
+0:     ldrb    w7, [x1], #1                    /* get 1 byte of input */
+       subs    w2, w2, #1
+       add     w8, w8, #1
        ins     v1.b[0], w7
        ext     v1.16b, v1.16b, v1.16b, #1      /* rotate in the input bytes */
        beq     8f                              /* out of input? */
-       cbnz    w25, 0b
+       cbnz    w8, 0b
        eor     v0.16b, v0.16b, v1.16b
-1:     ld1     {v3.4s}, [x23]                  /* load first round key */
-       prfm    pldl1strm, [x20]
-       cmp     w24, #12                        /* which key size? */
-       add     x6, x23, #16
-       sub     w7, w24, #2                     /* modified # of rounds */
+1:     ld1     {v3.4s}, [x4]                   /* load first round key */
+       prfm    pldl1strm, [x1]
+       cmp     w5, #12                         /* which key size? */
+       add     x6, x4, #16
+       sub     w7, w5, #2                      /* modified # of rounds */
        bmi     2f
        bne     5f
        mov     v5.16b, v3.16b
        ld1     {v5.4s}, [x6], #16              /* load next round key */
        bpl     3b
        aese    v0.16b, v4.16b
-       subs    w21, w21, #16                   /* last data? */
+       subs    w2, w2, #16                     /* last data? */
        eor     v0.16b, v0.16b, v5.16b          /* final round */
        bmi     6f
-       ld1     {v1.16b}, [x20], #16            /* load next input block */
+       ld1     {v1.16b}, [x1], #16             /* load next input block */
        eor     v0.16b, v0.16b, v1.16b          /* xor with mac */
-       beq     6f
-
-       if_will_cond_yield_neon
-       st1     {v0.16b}, [x19]                 /* store mac */
-       do_cond_yield_neon
-       ld1     {v0.16b}, [x19]                 /* reload mac */
-       endif_yield_neon
-
-       b       1b
-6:     st1     {v0.16b}, [x19]                 /* store mac */
+       bne     1b
+6:     st1     {v0.16b}, [x0]                  /* store mac */
        beq     10f
-       adds    w21, w21, #16
+       adds    w2, w2, #16
        beq     10f
-       mov     w25, w21
-7:     ldrb    w7, [x20], #1
+       mov     w8, w2
+7:     ldrb    w7, [x1], #1
        umov    w6, v0.b[0]
        eor     w6, w6, w7
-       strb    w6, [x19], #1
-       subs    w21, w21, #1
+       strb    w6, [x0], #1
+       subs    w2, w2, #1
        beq     10f
        ext     v0.16b, v0.16b, v0.16b, #1      /* rotate out the mac bytes */
        b       7b
-8:     mov     w7, w25
-       add     w25, w25, #16
+8:     mov     w7, w8
+       add     w8, w8, #16
 9:     ext     v1.16b, v1.16b, v1.16b, #1
        adds    w7, w7, #1
        bne     9b
        eor     v0.16b, v0.16b, v1.16b
-       st1     {v0.16b}, [x19]
-10:    str     w25, [x22]
-
-       frame_pop
+       st1     {v0.16b}, [x0]
+10:    str     w8, [x3]
        ret
 ENDPROC(ce_aes_ccm_auth_data)
 
 ENDPROC(ce_aes_ccm_final)
 
        .macro  aes_ccm_do_crypt,enc
-       frame_push      8
-
-       mov     x19, x0
-       mov     x20, x1
-       mov     x21, x2
-       mov     x22, x3
-       mov     x23, x4
-       mov     x24, x5
-       mov     x25, x6
-
-       ldr     x26, [x25, #8]                  /* load lower ctr */
-       ld1     {v0.16b}, [x24]                 /* load mac */
-CPU_LE(        rev     x26, x26                )       /* keep swabbed ctr in reg */
+       ldr     x8, [x6, #8]                    /* load lower ctr */
+       ld1     {v0.16b}, [x5]                  /* load mac */
+CPU_LE(        rev     x8, x8                  )       /* keep swabbed ctr in reg */
 0:     /* outer loop */
-       ld1     {v1.8b}, [x25]                  /* load upper ctr */
-       prfm    pldl1strm, [x20]
-       add     x26, x26, #1
-       rev     x9, x26
-       cmp     w23, #12                        /* which key size? */
-       sub     w7, w23, #2                     /* get modified # of rounds */
+       ld1     {v1.8b}, [x6]                   /* load upper ctr */
+       prfm    pldl1strm, [x1]
+       add     x8, x8, #1
+       rev     x9, x8
+       cmp     w4, #12                         /* which key size? */
+       sub     w7, w4, #2                      /* get modified # of rounds */
        ins     v1.d[1], x9                     /* no carry in lower ctr */
-       ld1     {v3.4s}, [x22]                  /* load first round key */
-       add     x10, x22, #16
+       ld1     {v3.4s}, [x3]                   /* load first round key */
+       add     x10, x3, #16
        bmi     1f
        bne     4f
        mov     v5.16b, v3.16b
        bpl     2b
        aese    v0.16b, v4.16b
        aese    v1.16b, v4.16b
-       subs    w21, w21, #16
-       bmi     7f                              /* partial block? */
-       ld1     {v2.16b}, [x20], #16            /* load next input block */
+       subs    w2, w2, #16
+       bmi     6f                              /* partial block? */
+       ld1     {v2.16b}, [x1], #16             /* load next input block */
        .if     \enc == 1
        eor     v2.16b, v2.16b, v5.16b          /* final round enc+mac */
        eor     v1.16b, v1.16b, v2.16b          /* xor with crypted ctr */
        eor     v1.16b, v2.16b, v5.16b          /* final round enc */
        .endif
        eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
-       st1     {v1.16b}, [x19], #16            /* write output block */
-       beq     5f
-
-       if_will_cond_yield_neon
-       st1     {v0.16b}, [x24]                 /* store mac */
-       do_cond_yield_neon
-       ld1     {v0.16b}, [x24]                 /* reload mac */
-       endif_yield_neon
-
-       b       0b
-5:
-CPU_LE(        rev     x26, x26                        )
-       st1     {v0.16b}, [x24]                 /* store mac */
-       str     x26, [x25, #8]                  /* store lsb end of ctr (BE) */
-
-6:     frame_pop
-       ret
-
-7:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
+       st1     {v1.16b}, [x0], #16             /* write output block */
+       bne     0b
+CPU_LE(        rev     x8, x8                  )
+       st1     {v0.16b}, [x5]                  /* store mac */
+       str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
+5:     ret
+
+6:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
        eor     v1.16b, v1.16b, v5.16b          /* final round enc */
-       st1     {v0.16b}, [x24]                 /* store mac */
-       add     w21, w21, #16                   /* process partial tail block */
-8:     ldrb    w9, [x20], #1                   /* get 1 byte of input */
+       st1     {v0.16b}, [x5]                  /* store mac */
+       add     w2, w2, #16                     /* process partial tail block */
+7:     ldrb    w9, [x1], #1                    /* get 1 byte of input */
        umov    w6, v1.b[0]                     /* get top crypted ctr byte */
        umov    w7, v0.b[0]                     /* get top mac byte */
        .if     \enc == 1
        eor     w9, w9, w6
        eor     w7, w7, w9
        .endif
-       strb    w9, [x19], #1                   /* store out byte */
-       strb    w7, [x24], #1                   /* store mac byte */
-       subs    w21, w21, #1
-       beq     6b
+       strb    w9, [x0], #1                    /* store out byte */
+       strb    w7, [x5], #1                    /* store mac byte */
+       subs    w2, w2, #1
+       beq     5b
        ext     v0.16b, v0.16b, v0.16b, #1      /* shift out mac byte */
        ext     v1.16b, v1.16b, v1.16b, #1      /* shift out ctr byte */
-       b       8b
+       b       7b
        .endm
 
        /*
 
        .endm
 
        .macro          pmull_gcm_do_crypt, enc
-       frame_push      10
+       ld1             {SHASH.2d}, [x4]
+       ld1             {XL.2d}, [x1]
+       ldr             x8, [x5, #8]                    // load lower counter
 
-       mov             x19, x0
-       mov             x20, x1
-       mov             x21, x2
-       mov             x22, x3
-       mov             x23, x4
-       mov             x24, x5
-       mov             x25, x6
-       mov             x26, x7
-       .if             \enc == 1
-       ldr             x27, [sp, #96]                  // first stacked arg
-       .endif
-
-       ldr             x28, [x24, #8]                  // load lower counter
-CPU_LE(        rev             x28, x28        )
-
-0:     mov             x0, x25
-       load_round_keys w26, x0
-       ld1             {SHASH.2d}, [x23]
-       ld1             {XL.2d}, [x20]
+       load_round_keys w7, x6
 
        movi            MASK.16b, #0xe1
        ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(        rev             x8, x8          )
        shl             MASK.2d, MASK.2d, #57
        eor             SHASH2.16b, SHASH2.16b, SHASH.16b
 
        .if             \enc == 1
-       ld1             {KS.16b}, [x27]
+       ldr             x10, [sp]
+       ld1             {KS.16b}, [x10]
        .endif
 
-1:     ld1             {CTR.8b}, [x24]                 // load upper counter
-       ld1             {INP.16b}, [x22], #16
-       rev             x9, x28
-       add             x28, x28, #1
-       sub             w19, w19, #1
+0:     ld1             {CTR.8b}, [x5]                  // load upper counter
+       ld1             {INP.16b}, [x3], #16
+       rev             x9, x8
+       add             x8, x8, #1
+       sub             w0, w0, #1
        ins             CTR.d[1], x9                    // set lower counter
 
        .if             \enc == 1
        eor             INP.16b, INP.16b, KS.16b        // encrypt input
-       st1             {INP.16b}, [x21], #16
+       st1             {INP.16b}, [x2], #16
        .endif
 
        rev64           T1.16b, INP.16b
 
-       cmp             w26, #12
-       b.ge            4f                              // AES-192/256?
+       cmp             w7, #12
+       b.ge            2f                              // AES-192/256?
 
-2:     enc_round       CTR, v21
+1:     enc_round       CTR, v21
 
        ext             T2.16b, XL.16b, XL.16b, #8
        ext             IN1.16b, T1.16b, T1.16b, #8
 
        .if             \enc == 0
        eor             INP.16b, INP.16b, KS.16b
-       st1             {INP.16b}, [x21], #16
+       st1             {INP.16b}, [x2], #16
        .endif
 
-       cbz             w19, 3f
+       cbnz            w0, 0b
 
-       if_will_cond_yield_neon
-       st1             {XL.2d}, [x20]
-       .if             \enc == 1
-       st1             {KS.16b}, [x27]
-       .endif
-       do_cond_yield_neon
-       b               0b
-       endif_yield_neon
+CPU_LE(        rev             x8, x8          )
+       st1             {XL.2d}, [x1]
+       str             x8, [x5, #8]                    // store lower counter
 
-       b               1b
-
-3:     st1             {XL.2d}, [x20]
        .if             \enc == 1
-       st1             {KS.16b}, [x27]
+       st1             {KS.16b}, [x10]
        .endif
 
-CPU_LE(        rev             x28, x28        )
-       str             x28, [x24, #8]                  // store lower counter
-
-       frame_pop
        ret
 
-4:     b.eq            5f                              // AES-192?
+2:     b.eq            3f                              // AES-192?
        enc_round       CTR, v17
        enc_round       CTR, v18
-5:     enc_round       CTR, v19
+3:     enc_round       CTR, v19
        enc_round       CTR, v20
-       b               2b
+       b               1b
        .endm
 
        /*