extern void iwmmxt_task_release(struct thread_info *);
 extern void iwmmxt_task_switch(struct thread_info *);
 
-extern void vfp_sync_state(struct thread_info *thread);
+extern void vfp_sync_hwstate(struct thread_info *);
+extern void vfp_flush_hwstate(struct thread_info *);
 
 #endif
 
 
        union vfp_state *vfp = &thread->vfpstate;
        struct user_vfp __user *ufp = data;
 
-       vfp_sync_state(thread);
+       vfp_sync_hwstate(thread);
 
        /* copy the floating point registers */
        if (copy_to_user(&ufp->fpregs, &vfp->hard.fpregs,
        union vfp_state *vfp = &thread->vfpstate;
        struct user_vfp __user *ufp = data;
 
-       vfp_sync_state(thread);
+       vfp_sync_hwstate(thread);
 
        /* copy the floating point registers */
        if (copy_from_user(&vfp->hard.fpregs, &ufp->fpregs,
        if (get_user(vfp->hard.fpscr, &ufp->fpscr))
                return -EFAULT;
 
+       vfp_flush_hwstate(thread);
+
        return 0;
 }
 #endif
 
  * saved one. This function is used by the ptrace mechanism.
  */
 #ifdef CONFIG_SMP
-void vfp_sync_state(struct thread_info *thread)
+void vfp_sync_hwstate(struct thread_info *thread)
+{
+}
+
+void vfp_flush_hwstate(struct thread_info *thread)
 {
        /*
         * On SMP systems, the VFP state is automatically saved at every
        thread->vfpstate.hard.cpu = NR_CPUS;
 }
 #else
-void vfp_sync_state(struct thread_info *thread)
+void vfp_sync_hwstate(struct thread_info *thread)
 {
        unsigned int cpu = get_cpu();
 
                 */
                fmxr(FPEXC, fpexc | FPEXC_EN);
                vfp_save_state(&thread->vfpstate, fpexc | FPEXC_EN);
+               fmxr(FPEXC, fpexc);
+       }
+
+       put_cpu();
+}
+
+void vfp_flush_hwstate(struct thread_info *thread)
+{
+       unsigned int cpu = get_cpu();
+
+       /*
+        * If the thread we're interested in is the current owner of the
+        * hardware VFP state, then we need to save its state.
+        */
+       if (last_VFP_context[cpu] == &thread->vfpstate) {
+               u32 fpexc = fmrx(FPEXC);
+
                fmxr(FPEXC, fpexc & ~FPEXC_EN);
 
                /*