ld      r23,0(r23)
        ld      r23,CPU_SPEC_RESTORE(r23)
        cmpdi   0,r23,0
-       beq     4f
+       beq     3f
        ld      r23,0(r23)
        mtctr   r23
        bctrl
 
-3:     HMT_LOW
+3:     LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */
+       lwarx   r4,0,r3
+       subi    r4,r4,1
+       stwcx.  r4,0,r3
+       bne     3b
+       isync
+
+4:     HMT_LOW
        lbz     r23,PACAPROCSTART(r13)  /* Test if this processor should */
                                        /* start.                        */
 #ifndef CONFIG_SMP
-       b       3b                      /* Never go on non-SMP           */
+       b       4b                      /* Never go on non-SMP           */
 #else
        cmpwi   0,r23,0
-       beq     3b                      /* Loop until told to go         */
+       beq     4b                      /* Loop until told to go         */
 
        sync                            /* order paca.run and cur_cpu_spec */
+       isync                           /* In case code patching happened */
 
-4:     /* Create a temp kernel stack for use before relocation is on.  */
+       /* Create a temp kernel stack for use before relocation is on.  */
        ld      r1,PACAEMERGSP(r13)
        subi    r1,r1,STACK_FRAME_OVERHEAD
 
 
                                          const char *uname, int depth,
                                          void *data)
 {
-       static int logical_cpuid = 0;
        char *type = of_get_flat_dt_prop(node, "device_type", NULL);
        const u32 *prop;
        const u32 *intserv;
        int i, nthreads;
        unsigned long len;
-       int found = 0;
+       int found = -1;
 
        /* We are scanning "cpu" nodes only */
        if (type == NULL || strcmp(type, "cpu") != 0)
                 * booted proc.
                 */
                if (initial_boot_params && initial_boot_params->version >= 2) {
-                       if (intserv[i] ==
-                                       initial_boot_params->boot_cpuid_phys) {
-                               found = 1;
-                               break;
-                       }
+                       if (intserv[i] == initial_boot_params->boot_cpuid_phys)
+                               found = boot_cpu_count;
                } else {
                        /*
                         * Check if it's the boot-cpu, set it's hw index now,
                         * off secondary threads.
                         */
                        if (of_get_flat_dt_prop(node,
-                                       "linux,boot-cpu", NULL) != NULL) {
-                               found = 1;
-                               break;
-                       }
+                                       "linux,boot-cpu", NULL) != NULL)
+                               found = boot_cpu_count;
                }
-
 #ifdef CONFIG_SMP
                /* logical cpu id is always 0 on UP kernels */
-               logical_cpuid++;
+               boot_cpu_count++;
 #endif
        }
 
-       if (found) {
-               DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
+       if (found >= 0) {
+               DBG("boot cpu: logical %d physical %d\n", found,
                        intserv[i]);
-               boot_cpuid = logical_cpuid;
-               set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+               boot_cpuid = found;
+               set_hard_smp_processor_id(found, intserv[i]);
 
                /*
                 * PAPR defines "logical" PVR values for cpus that
 
 #endif
 
 int boot_cpuid = 0;
+int __initdata boot_cpu_count;
 u64 ppc64_pft_size;
 
 /* Pick defaults since we might want to patch instructions
 void smp_release_cpus(void)
 {
        unsigned long *ptr;
+       int i;
 
        DBG(" -> smp_release_cpus()\n");
 
        ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
                        - PHYSICAL_START);
        *ptr = __pa(generic_secondary_smp_init);
-       mb();
+
+       /* And wait a bit for them to catch up */
+       for (i = 0; i < 100000; i++) {
+               mb();
+               HMT_low();
+               if (boot_cpu_count == 0)
+                       break;
+               udelay(1);
+       }
+       DBG("boot_cpu_count = %d\n", boot_cpu_count);
 
        DBG(" <- smp_release_cpus()\n");
 }