*     than one 32bit instruction in Thumb-2)
  */
 
-
-       UNWIND( .fnstart                        )
-               enter   r4, lr
-       UNWIND( .fnend                          )
-
        UNWIND( .fnstart                        )
-               usave   r4, lr                    @ in first stmdb block
+               enter   r4, UNWIND(fpreg,) lr
+       UNWIND( .setfp  fpreg, sp               )
+       UNWIND( mov     fpreg, sp               )
 
                subs    r2, r2, #4
                blt     8f
                bne     10f
 
 1:             subs    r2, r2, #(28)
-               stmfd   sp!, {r5 - r8}
-       UNWIND( .fnend                          )
-
-       UNWIND( .fnstart                        )
-               usave   r4, lr
-       UNWIND( .save   {r5 - r8}               ) @ in second stmfd block
+               stmfd   sp!, {r5, r6, r8, r9}
                blt     5f
 
        CALGN(  ands    ip, r0, #31             )
        PLD(    pld     [r1, #92]               )
 
 3:     PLD(    pld     [r1, #124]              )
-4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+4:             ldr8w   r1, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
                subs    r2, r2, #32
-               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+               str8w   r0, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
                bge     3b
        PLD(    cmn     r2, #96                 )
        PLD(    bge     4b                      )
                ldr1w   r1, r4, abort=20f
                ldr1w   r1, r5, abort=20f
                ldr1w   r1, r6, abort=20f
-               ldr1w   r1, r7, abort=20f
                ldr1w   r1, r8, abort=20f
+               ldr1w   r1, r9, abort=20f
                ldr1w   r1, lr, abort=20f
 
 #if LDR1W_SHIFT < STR1W_SHIFT
                str1w   r0, r4, abort=20f
                str1w   r0, r5, abort=20f
                str1w   r0, r6, abort=20f
-               str1w   r0, r7, abort=20f
                str1w   r0, r8, abort=20f
+               str1w   r0, r9, abort=20f
                str1w   r0, lr, abort=20f
 
        CALGN(  bcs     2b                      )
 
-7:             ldmfd   sp!, {r5 - r8}
-       UNWIND( .fnend                          ) @ end of second stmfd block
+7:             ldmfd   sp!, {r5, r6, r8, r9}
 
-       UNWIND( .fnstart                        )
-               usave   r4, lr                    @ still in first stmdb block
 8:             movs    r2, r2, lsl #31
                ldr1b   r1, r3, ne, abort=21f
                ldr1b   r1, r4, cs, abort=21f
                str1b   r0, r4, cs, abort=21f
                str1b   r0, ip, cs, abort=21f
 
-               exit    r4, pc
+               exit    r4, UNWIND(fpreg,) pc
 
 9:             rsb     ip, ip, #4
                cmp     ip, #2
                ldr1w   r1, lr, abort=21f
                beq     17f
                bgt     18f
-       UNWIND( .fnend                          )
 
 
                .macro  forward_copy_shift pull push
 
-       UNWIND( .fnstart                        )
-               usave   r4, lr                    @ still in first stmdb block
                subs    r2, r2, #28
                blt     14f
 
        CALGN(  subcc   r2, r2, ip              )
        CALGN(  bcc     15f                     )
 
-11:            stmfd   sp!, {r5 - r9}
-       UNWIND( .fnend                          )
+11:            stmfd   sp!, {r5, r6, r8 - r10}
 
-       UNWIND( .fnstart                        )
-               usave   r4, lr
-       UNWIND( .save   {r5 - r9}               ) @ in new second stmfd block
        PLD(    pld     [r1, #0]                )
        PLD(    subs    r2, r2, #96             )
        PLD(    pld     [r1, #28]               )
        PLD(    pld     [r1, #92]               )
 
 12:    PLD(    pld     [r1, #124]              )
-13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
+13:            ldr4w   r1, r4, r5, r6, r8, abort=19f
                mov     r3, lr, lspull #\pull
                subs    r2, r2, #32
-               ldr4w   r1, r8, r9, ip, lr, abort=19f
+               ldr4w   r1, r9, r10, ip, lr, abort=19f
                orr     r3, r3, r4, lspush #\push
                mov     r4, r4, lspull #\pull
                orr     r4, r4, r5, lspush #\push
                mov     r5, r5, lspull #\pull
                orr     r5, r5, r6, lspush #\push
                mov     r6, r6, lspull #\pull
-               orr     r6, r6, r7, lspush #\push
-               mov     r7, r7, lspull #\pull
-               orr     r7, r7, r8, lspush #\push
+               orr     r6, r6, r8, lspush #\push
                mov     r8, r8, lspull #\pull
                orr     r8, r8, r9, lspush #\push
                mov     r9, r9, lspull #\pull
-               orr     r9, r9, ip, lspush #\push
+               orr     r9, r9, r10, lspush #\push
+               mov     r10, r10, lspull #\pull
+               orr     r10, r10, ip, lspush #\push
                mov     ip, ip, lspull #\pull
                orr     ip, ip, lr, lspush #\push
-               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
+               str8w   r0, r3, r4, r5, r6, r8, r9, r10, ip, abort=19f
                bge     12b
        PLD(    cmn     r2, #96                 )
        PLD(    bge     13b                     )
 
-               ldmfd   sp!, {r5 - r9}
-       UNWIND( .fnend                          ) @ end of the second stmfd block
+               ldmfd   sp!, {r5, r6, r8 - r10}
 
-       UNWIND( .fnstart                        )
-               usave   r4, lr                    @ still in first stmdb block
 14:            ands    ip, r2, #28
                beq     16f
 
 
 16:            sub     r1, r1, #(\push / 8)
                b       8b
-       UNWIND( .fnend                          )
 
                .endm
 
 
 18:            forward_copy_shift      pull=24 push=8
 
+       UNWIND( .fnend                          )
 
 /*
  * Abort preamble and completion macros.
  */
 
        .macro  copy_abort_preamble
-19:    ldmfd   sp!, {r5 - r9}
+19:    ldmfd   sp!, {r5, r6, r8 - r10}
        b       21f
-20:    ldmfd   sp!, {r5 - r8}
+20:    ldmfd   sp!, {r5, r6, r8, r9}
 21:
        .endm
 
        .macro  copy_abort_end
-       ldmfd   sp!, {r4, pc}
+       ldmfd   sp!, {r4, UNWIND(fpreg,) pc}
        .endm