list_del(&vcpu->arch.run_list);
 }
 
+static int kvmppc_grab_hwthread(int cpu)
+{
+       struct paca_struct *tpaca;
+       long timeout = 1000;
+
+       tpaca = &paca[cpu];
+
+       /* Ensure the thread won't go into the kernel if it wakes */
+       tpaca->kvm_hstate.hwthread_req = 1;
+
+       /*
+        * If the thread is already executing in the kernel (e.g. handling
+        * a stray interrupt), wait for it to get back to nap mode.
+        * The smp_mb() is to ensure that our setting of hwthread_req
+        * is visible before we look at hwthread_state, so if this
+        * races with the code at system_reset_pSeries and the thread
+        * misses our setting of hwthread_req, we are sure to see its
+        * setting of hwthread_state, and vice versa.
+        */
+       smp_mb();
+       while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+               if (--timeout <= 0) {
+                       pr_err("KVM: couldn't grab cpu %d\n", cpu);
+                       return -EBUSY;
+               }
+               udelay(1);
+       }
+       return 0;
+}
+
+static void kvmppc_release_hwthread(int cpu)
+{
+       struct paca_struct *tpaca;
+
+       tpaca = &paca[cpu];
+       tpaca->kvm_hstate.hwthread_req = 0;
+       tpaca->kvm_hstate.kvm_vcpu = NULL;
+}
+
 static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 {
        int cpu;
        smp_wmb();
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
        if (vcpu->arch.ptid) {
-               tpaca->cpu_start = 0x80;
-               wmb();
+               kvmppc_grab_hwthread(cpu);
                xics_wake_cpu(cpu);
                ++vc->n_woken;
        }
        struct kvm_vcpu *vcpu, *vcpu0, *vnext;
        long ret;
        u64 now;
-       int ptid;
+       int ptid, i;
 
        /* don't start if any threads have a signal pending */
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
        vc->napping_threads = 0;
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
                kvmppc_start_thread(vcpu);
+       /* Grab any remaining hw threads so they can't go into the kernel */
+       for (i = ptid; i < threads_per_core; ++i)
+               kvmppc_grab_hwthread(vc->pcpu + i);
 
        preempt_disable();
        spin_unlock(&vc->lock);
 
        kvm_guest_enter();
        __kvmppc_vcore_entry(NULL, vcpu0);
+       for (i = 0; i < threads_per_core; ++i)
+               kvmppc_release_hwthread(vc->pcpu + i);
 
        spin_lock(&vc->lock);
        /* disable sending of IPIs on virtual external irqs */
 
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
 
 /*****************************************************************************
  *                                                                           *
 
 #define XICS_XIRR              4
 #define XICS_QIRR              0xc
+#define XICS_IPI               2       /* interrupt source # for IPIs */
 
 /*
  * We come in here when wakened from nap mode on a secondary hw thread.
        subi    r1,r1,STACK_FRAME_OVERHEAD
        ld      r2,PACATOC(r13)
 
-       /* were we napping due to cede? */
-       lbz     r0,HSTATE_NAPPING(r13)
-       cmpwi   r0,0
-       bne     kvm_end_cede
+       li      r0,KVM_HWTHREAD_IN_KVM
+       stb     r0,HSTATE_HWTHREAD_STATE(r13)
 
-       /* get vcpu pointer */
-       ld      r4, HSTATE_KVM_VCPU(r13)
+       /* NV GPR values from power7_idle() will no longer be valid */
+       li      r0,1
+       stb     r0,PACA_NAPSTATELOST(r13)
 
-       /* We got here with an IPI; clear it */
-       ld      r5, HSTATE_XICS_PHYS(r13)
-       li      r0, 0xff
-       li      r6, XICS_QIRR
-       li      r7, XICS_XIRR
-       lwzcix  r8, r5, r7              /* ack the interrupt */
+       /* get vcpu pointer, NULL if we have no vcpu to run */
+       ld      r4,HSTATE_KVM_VCPU(r13)
+       cmpdi   cr1,r4,0
+
+       /* Check the wake reason in SRR1 to see why we got here */
+       mfspr   r3,SPRN_SRR1
+       rlwinm  r3,r3,44-31,0x7         /* extract wake reason field */
+       cmpwi   r3,4                    /* was it an external interrupt? */
+       bne     27f
+
+       /*
+        * External interrupt - for now assume it is an IPI, since we
+        * should never get any other interrupts sent to offline threads.
+        * Only do this for secondary threads.
+        */
+       beq     cr1,25f
+       lwz     r3,VCPU_PTID(r4)
+       cmpwi   r3,0
+       beq     27f
+25:    ld      r5,HSTATE_XICS_PHYS(r13)
+       li      r0,0xff
+       li      r6,XICS_QIRR
+       li      r7,XICS_XIRR
+       lwzcix  r8,r5,r7                /* get and ack the interrupt */
        sync
-       stbcix  r0, r5, r6              /* clear it */
-       stwcix  r8, r5, r7              /* EOI it */
+       clrldi. r9,r8,40                /* get interrupt source ID. */
+       beq     27f                     /* none there? */
+       cmpwi   r9,XICS_IPI
+       bne     26f
+       stbcix  r0,r5,r6                /* clear IPI */
+26:    stwcix  r8,r5,r7                /* EOI the interrupt */
 
-       /* NV GPR values from power7_idle() will no longer be valid */
-       stb     r0, PACA_NAPSTATELOST(r13)
+27:    /* XXX should handle hypervisor maintenance interrupts etc. here */
+
+       /* if we have no vcpu to run, go back to sleep */
+       beq     cr1,kvm_no_guest
+
+       /* were we napping due to cede? */
+       lbz     r0,HSTATE_NAPPING(r13)
+       cmpwi   r0,0
+       bne     kvm_end_cede
 
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
         * Take a nap until a decrementer or external interrupt occurs,
         * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
         */
-       li      r0,0x80
-       stb     r0,PACAPROCSTART(r13)
+       li      r0,1
+       stb     r0,HSTATE_HWTHREAD_REQ(r13)
        mfspr   r5,SPRN_LPCR
        ori     r5,r5,LPCR_PECE0 | LPCR_PECE1
        mtspr   SPRN_LPCR,r5
 kvm_end_cede:
        /* Woken by external or decrementer interrupt */
        ld      r1, HSTATE_HOST_R1(r13)
-       ld      r2, PACATOC(r13)
 
-       /* If we're a secondary thread and we got here by an IPI, ack it */
-       ld      r4,HSTATE_KVM_VCPU(r13)
-       lwz     r3,VCPU_PTID(r4)
-       cmpwi   r3,0
-       beq     27f
-       mfspr   r3,SPRN_SRR1
-       rlwinm  r3,r3,44-31,0x7         /* extract wake reason field */
-       cmpwi   r3,4                    /* was it an external interrupt? */
-       bne     27f
-       ld      r5, HSTATE_XICS_PHYS(r13)
-       li      r0,0xff
-       li      r6,XICS_QIRR
-       li      r7,XICS_XIRR
-       lwzcix  r8,r5,r7                /* ack the interrupt */
-       sync
-       stbcix  r0,r5,r6                /* clear it */
-       stwcix  r8,r5,r7                /* EOI it */
-27:
        /* load up FP state */
        bl      kvmppc_load_fp
 
        stwcx.  r3, 0, r4
        bne     51b
 
+kvm_no_guest:
+       li      r0, KVM_HWTHREAD_IN_NAP
+       stb     r0, HSTATE_HWTHREAD_STATE(r13)
+       li      r0, 0
+       std     r0, HSTATE_KVM_VCPU(r13)
+
        li      r3, LPCR_PECE0
        mfspr   r4, SPRN_LPCR
        rlwimi  r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
        mtspr   SPRN_LPCR, r4
        isync
-       li      r0, 0
        std     r0, HSTATE_SCRATCH0(r13)
        ptesync
        ld      r0, HSTATE_SCRATCH0(r13)