.text
                adr     r0, LC0
- ARM(          ldmia   r0, {r1, r2, r3, r4, r5, r6, ip, sp}    )
- THUMB(                ldmia   r0, {r1, r2, r3, r4, r5, r6, ip}        )
+ ARM(          ldmia   r0, {r1, r2, r3, r4, r5, r6, r11, ip, sp})
+ THUMB(                ldmia   r0, {r1, r2, r3, r4, r5, r6, r11, ip}   )
  THUMB(                ldr     sp, [r0, #28]                           )
                subs    r0, r0, r1              @ calculate the delta offset
 
                /*
                 * We're running at a different address.  We need to fix
                 * up various pointers:
-                *   r5 - zImage base address
-                *   r6 - GOT start
+                *   r5 - zImage base address (_start)
+                *   r6 - size of decompressed image
+                *   r11 - GOT start
                 *   ip - GOT end
                 */
                add     r5, r5, r0
-               add     r6, r6, r0
+               add     r11, r11, r0
                add     ip, ip, r0
 
 #ifndef CONFIG_ZBOOT_ROM
                /*
                 * Relocate all entries in the GOT table.
                 */
-1:             ldr     r1, [r6, #0]            @ relocate entries in the GOT
+1:             ldr     r1, [r11, #0]           @ relocate entries in the GOT
                add     r1, r1, r0              @ table.  This fixes up the
-               str     r1, [r6], #4            @ C references.
-               cmp     r6, ip
+               str     r1, [r11], #4           @ C references.
+               cmp     r11, ip
                blo     1b
 #else
 
                 * Relocate entries in the GOT table.  We only relocate
                 * the entries that are outside the (relocated) BSS region.
                 */
-1:             ldr     r1, [r6, #0]            @ relocate entries in the GOT
+1:             ldr     r1, [r11, #0]           @ relocate entries in the GOT
                cmp     r1, r2                  @ entry < bss_start ||
                cmphs   r3, r1                  @ _end < entry
                addlo   r1, r1, r0              @ table.  This fixes up the
-               str     r1, [r6], #4            @ C references.
-               cmp     r6, ip
+               str     r1, [r11], #4           @ C references.
+               cmp     r11, ip
                blo     1b
 #endif
 
  * Check to see if we will overwrite ourselves.
  *   r4 = final kernel address
  *   r5 = start of this image
+ *   r6 = size of decompressed image
  *   r2 = end of malloc space (and therefore this image)
  * We basically want:
  *   r4 >= r2 -> OK
  */
                cmp     r4, r2
                bhs     wont_overwrite
-               sub     r3, sp, r5              @ > compressed kernel size
-               add     r0, r4, r3, lsl #2      @ allow for 4x expansion
+               add     r0, r4, r6
                cmp     r0, r5
                bls     wont_overwrite
 
  * r1-r3  = unused
  * r4     = kernel execution address
  * r5     = decompressed kernel start
- * r6     = processor ID
  * r7     = architecture ID
  * r8     = atags pointer
  * r9-r12,r14 = corrupted
                .word   _end                    @ r3
                .word   zreladdr                @ r4
                .word   _start                  @ r5
-               .word   _got_start              @ r6
+               .word   _image_size             @ r6
+               .word   _got_start              @ r11
                .word   _got_end                @ ip
                .word   user_stack+4096         @ sp
 LC1:           .word   reloc_end - reloc_start
  *
  * On entry,
  *  r4 = kernel execution address
- *  r6 = processor ID
  *  r7 = architecture number
  *  r8 = atags pointer
  *  r9 = run-time address of "start"  (???)
  * r1-r3  = unused
  * r4     = kernel execution address
  * r5     = decompressed kernel start
- * r6     = processor ID
  * r7     = architecture ID
  * r8     = atags pointer
  * r9-r12,r14 = corrupted
  *  r1  = corrupted
  *  r2  = corrupted
  *  r3  = block offset
- *  r6  = corrupted
+ *  r9  = corrupted
  *  r12 = corrupted
  */
 
 call_cache_fn: adr     r12, proc_types
 #ifdef CONFIG_CPU_CP15
-               mrc     p15, 0, r6, c0, c0      @ get processor ID
+               mrc     p15, 0, r9, c0, c0      @ get processor ID
 #else
-               ldr     r6, =CONFIG_PROCESSOR_ID
+               ldr     r9, =CONFIG_PROCESSOR_ID
 #endif
 1:             ldr     r1, [r12, #0]           @ get value
                ldr     r2, [r12, #4]           @ get mask
-               eor     r1, r1, r6              @ (real ^ match)
+               eor     r1, r1, r9              @ (real ^ match)
                tst     r1, r2                  @       & mask
  ARM(          addeq   pc, r12, r3             ) @ call cache function
  THUMB(                addeq   r12, r3                 )
  * Turn off the Cache and MMU.  ARMv3 does not support
  * reading the control register, but ARMv4 does.
  *
- * On entry,  r6 = processor ID
- * On exit,   r0, r1, r2, r3, r12 corrupted
+ * On exit, r0, r1, r2, r3, r9, r12 corrupted
  * This routine must preserve: r4, r6, r7
  */
                .align  5
 /*
  * Clean and flush the cache to maintain consistency.
  *
- * On entry,
- *  r6 = processor ID
  * On exit,
- *  r1, r2, r3, r11, r12 corrupted
+ *  r1, r2, r3, r9, r11, r12 corrupted
  * This routine must preserve:
  *  r0, r4, r5, r6, r7
  */
                mov     r2, #64*1024            @ default: 32K dcache size (*2)
                mov     r11, #32                @ default: 32 byte line size
                mrc     p15, 0, r3, c0, c0, 1   @ read cache type
-               teq     r3, r6                  @ cache ID register present?
+               teq     r3, r9                  @ cache ID register present?
                beq     no_cache_id
                mov     r1, r3, lsr #18
                and     r1, r1, #7