long tlb_lo1;
 };
 
+#define KVM_MIPS_FPU_FPU       0x1
+
 #define KVM_MIPS_GUEST_TLB_SIZE        64
 struct kvm_vcpu_arch {
        void *host_ebase, *guest_ebase;
 
        /* FPU State */
        struct mips_fpu_struct fpu;
+       /* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+       unsigned int fpu_inuse;
 
        /* COP0 State */
        struct mips_coproc *cop0;
 
        /* WAIT executed */
        int wait;
+
+       u8 fpu_enabled;
 };
 
 
        kvm_set_c0_guest_ebase(cop0, ((val) & (change)));               \
 }
 
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+               vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return kvm_mips_guest_can_have_fpu(vcpu) &&
+               kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
 
 struct kvm_mips_callbacks {
        int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
 /* Trampoline ASM routine to start running in "Guest" context */
 extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
+/* FPU context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
 /* TLB handling */
 uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
 
        OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
        OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
        OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+       BLANK();
+
+       OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+       OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+       OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+       OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+       OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+       OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+       OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+       OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+       OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+       OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+       OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+       OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+       OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+       OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+       OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+       OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+       OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+       OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+       OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+       OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+       OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+       OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+       OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+       OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+       OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+       OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+       OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+       OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+       OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+       OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+       OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+       OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+       OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+       BLANK();
+
        OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
        OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
        OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
 
 
 kvm-objs := $(common-objs) mips.o emulate.o locore.o \
            interrupt.o stats.o commpage.o \
-           dyntrans.o trap_emul.o
+           dyntrans.o trap_emul.o fpu.o
 
 obj-$(CONFIG_KVM)      += kvm.o
 obj-y                  += callback.o tlb.o
 
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+       .set    noreorder
+       .set    noat
+
+LEAF(__kvm_save_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       sdc1    $f1,  VCPU_FPR1(a0)
+       sdc1    $f3,  VCPU_FPR3(a0)
+       sdc1    $f5,  VCPU_FPR5(a0)
+       sdc1    $f7,  VCPU_FPR7(a0)
+       sdc1    $f9,  VCPU_FPR9(a0)
+       sdc1    $f11, VCPU_FPR11(a0)
+       sdc1    $f13, VCPU_FPR13(a0)
+       sdc1    $f15, VCPU_FPR15(a0)
+       sdc1    $f17, VCPU_FPR17(a0)
+       sdc1    $f19, VCPU_FPR19(a0)
+       sdc1    $f21, VCPU_FPR21(a0)
+       sdc1    $f23, VCPU_FPR23(a0)
+       sdc1    $f25, VCPU_FPR25(a0)
+       sdc1    $f27, VCPU_FPR27(a0)
+       sdc1    $f29, VCPU_FPR29(a0)
+       sdc1    $f31, VCPU_FPR31(a0)
+1:     sdc1    $f0,  VCPU_FPR0(a0)
+       sdc1    $f2,  VCPU_FPR2(a0)
+       sdc1    $f4,  VCPU_FPR4(a0)
+       sdc1    $f6,  VCPU_FPR6(a0)
+       sdc1    $f8,  VCPU_FPR8(a0)
+       sdc1    $f10, VCPU_FPR10(a0)
+       sdc1    $f12, VCPU_FPR12(a0)
+       sdc1    $f14, VCPU_FPR14(a0)
+       sdc1    $f16, VCPU_FPR16(a0)
+       sdc1    $f18, VCPU_FPR18(a0)
+       sdc1    $f20, VCPU_FPR20(a0)
+       sdc1    $f22, VCPU_FPR22(a0)
+       sdc1    $f24, VCPU_FPR24(a0)
+       sdc1    $f26, VCPU_FPR26(a0)
+       sdc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        sdc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       ldc1    $f1,  VCPU_FPR1(a0)
+       ldc1    $f3,  VCPU_FPR3(a0)
+       ldc1    $f5,  VCPU_FPR5(a0)
+       ldc1    $f7,  VCPU_FPR7(a0)
+       ldc1    $f9,  VCPU_FPR9(a0)
+       ldc1    $f11, VCPU_FPR11(a0)
+       ldc1    $f13, VCPU_FPR13(a0)
+       ldc1    $f15, VCPU_FPR15(a0)
+       ldc1    $f17, VCPU_FPR17(a0)
+       ldc1    $f19, VCPU_FPR19(a0)
+       ldc1    $f21, VCPU_FPR21(a0)
+       ldc1    $f23, VCPU_FPR23(a0)
+       ldc1    $f25, VCPU_FPR25(a0)
+       ldc1    $f27, VCPU_FPR27(a0)
+       ldc1    $f29, VCPU_FPR29(a0)
+       ldc1    $f31, VCPU_FPR31(a0)
+1:     ldc1    $f0,  VCPU_FPR0(a0)
+       ldc1    $f2,  VCPU_FPR2(a0)
+       ldc1    $f4,  VCPU_FPR4(a0)
+       ldc1    $f6,  VCPU_FPR6(a0)
+       ldc1    $f8,  VCPU_FPR8(a0)
+       ldc1    $f10, VCPU_FPR10(a0)
+       ldc1    $f12, VCPU_FPR12(a0)
+       ldc1    $f14, VCPU_FPR14(a0)
+       ldc1    $f16, VCPU_FPR16(a0)
+       ldc1    $f18, VCPU_FPR18(a0)
+       ldc1    $f20, VCPU_FPR20(a0)
+       ldc1    $f22, VCPU_FPR22(a0)
+       ldc1    $f24, VCPU_FPR24(a0)
+       ldc1    $f26, VCPU_FPR26(a0)
+       ldc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        ldc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+       .set    push
+       SET_HARDFLOAT
+       lw      t0, VCPU_FCR31(a0)
+       /*
+        * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+        * See kvm_mips_csr_die_notify() which handles t0 containing a value
+        * which triggers an FP Exception, which must be stepped over and
+        * ignored since the set cause bits must remain there for the guest.
+        */
+       ctc1    t0, fcr31
+       jr      ra
+        nop
+       .set    pop
+       END(__kvm_restore_fcsr)
 
        LONG_L  k0, VCPU_HOST_EBASE(k1)
        mtc0    k0,CP0_EBASE
 
+       /*
+        * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+        * trigger FPE for pending exceptions.
+        */
+       .set    at
+       and     v1, v0, ST0_CU1
+       beqz    v1, 1f
+        nop
+       .set    push
+       SET_HARDFLOAT
+       cfc1    t0, fcr31
+       sw      t0, VCPU_FCR31(k1)
+       ctc1    zero,fcr31
+       .set    pop
+       .set    noat
+1:
+
        /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
        .set    at
        and     v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
 
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
                }
        }
 
+       if (ret == RESUME_GUEST) {
+               /*
+                * If FPU is enabled (i.e. the guest's FPU context is live),
+                * restore FCR31.
+                *
+                * This should be before returning to the guest exception
+                * vector, as it may well cause an FP exception if there are
+                * pending exception bits unmasked. (see
+                * kvm_mips_csr_die_notifier() for how that is handled).
+                */
+               if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+                   read_c0_status() & ST0_CU1)
+                       __kvm_restore_fcsr(&vcpu->arch);
+       }
+
        /* Disable HTW before returning to guest or host */
        htw_stop();
 
        return ret;
 }
 
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       unsigned int sr, cfg5;
+
+       preempt_disable();
+
+       /*
+        * Enable FPU for guest
+        * We set FR and FRE according to guest context
+        */
+       sr = kvm_read_c0_guest_status(cop0);
+       change_c0_status(ST0_CU1 | ST0_FR, sr);
+       if (cpu_has_fre) {
+               cfg5 = kvm_read_c0_guest_config5(cop0);
+               change_c0_config5(MIPS_CONF5_FRE, cfg5);
+       }
+       enable_fpu_hazard();
+
+       /* If guest FPU state not active, restore it now */
+       if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+               __kvm_restore_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+       }
+
+       preempt_enable();
+}
+
+/* Drop FPU without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+       preempt_disable();
+       if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               clear_c0_status(ST0_CU1 | ST0_FR);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+       }
+       preempt_enable();
+}
+
+/* Save and disable FPU */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+       /*
+        * FPU gets disabled in root context (hardware) when it is disabled in
+        * guest context (software), but the register state in the hardware may
+        * still be in use. This is why we explicitly re-enable the hardware
+        * before saving.
+        */
+
+       preempt_disable();
+       if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               set_c0_status(ST0_CU1);
+               enable_fpu_hazard();
+
+               __kvm_save_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+               /* Disable FPU */
+               clear_c0_status(ST0_CU1 | ST0_FR);
+       }
+       preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR which is used to restore guest FCSR state
+ * and may trigger a "harmless" FP exception if cause bits are set in the value
+ * being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+                                  unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+       unsigned long pc;
+
+       /* Only interested in FPE */
+       if (cmd != DIE_FP)
+               return NOTIFY_DONE;
+
+       /* Return immediately if guest context isn't active */
+       if (!(current->flags & PF_VCPU))
+               return NOTIFY_DONE;
+
+       /* Should never get here from user mode */
+       BUG_ON(user_mode(regs));
+
+       pc = instruction_pointer(regs);
+       switch (cmd) {
+       case DIE_FP:
+               /* match 2nd instruction in __kvm_restore_fcsr */
+               if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+                       return NOTIFY_DONE;
+               break;
+       }
+
+       /* Move PC forward a little and continue executing */
+       instruction_pointer(regs) += 4;
+
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+       .notifier_call = kvm_mips_csr_die_notify,
+};
+
 int __init kvm_mips_init(void)
 {
        int ret;
        if (ret)
                return ret;
 
+       register_die_notifier(&kvm_mips_csr_die_notifier);
+
        /*
         * On MIPS, kernel modules are executed from "mapped space", which
         * requires TLBs. The TLB handling code is statically linked with
        kvm_mips_gfn_to_pfn = NULL;
        kvm_mips_release_pfn_clean = NULL;
        kvm_mips_is_error_pfn = NULL;
+
+       unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
 module_init(kvm_mips_init);
 
 
 static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
 {
+       kvm_lose_fpu(vcpu);
+
        return 0;
 }