* Forget coprocessor state..
         */
        preempt_disable();
-       tsk->fpu_counter = 0;
+       tsk->thread.fpu_counter = 0;
        __drop_fpu(tsk);
        clear_used_math();
        preempt_enable();
         * or if the past 5 consecutive context-switches used math.
         */
        fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-                                            new->fpu_counter > 5);
+                                            new->thread.fpu_counter > 5);
        if (__thread_has_fpu(old)) {
                if (!__save_init_fpu(old))
                        cpu = ~0;
 
                /* Don't change CR0.TS if we just switch! */
                if (fpu.preload) {
-                       new->fpu_counter++;
+                       new->thread.fpu_counter++;
                        __thread_set_has_fpu(new);
                        prefetch(new->thread.fpu.state);
                } else if (!use_eager_fpu())
                        stts();
        } else {
-               old->fpu_counter = 0;
+               old->thread.fpu_counter = 0;
                old->thread.fpu.last_cpu = ~0;
                if (fpu.preload) {
-                       new->fpu_counter++;
+                       new->thread.fpu_counter++;
                        if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
                                fpu.preload = 0;
                        else
 
        unsigned long           iopl;
        /* Max allowed port in the bitmap, in bytes: */
        unsigned                io_bitmap_max;
+       /*
+        * fpu_counter contains the number of consecutive context switches
+        * that the FPU is used. If this is over a threshold, the lazy fpu
+        * saving becomes unlazy to save the trap. This is an unsigned char
+        * so that after 256 times the counter wraps and the behavior turns
+        * lazy again; this to deal with bursty apps that only use FPU for
+        * a short time
+        */
+       unsigned char fpu_counter;
 };
 
 /*
 
                __save_init_fpu(tsk);
                __thread_fpu_end(tsk);
        } else
-               tsk->fpu_counter = 0;
+               tsk->thread.fpu_counter = 0;
        preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
 
                childregs->orig_ax = -1;
                childregs->cs = __KERNEL_CS | get_kernel_rpl();
                childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
-               p->fpu_counter = 0;
+               p->thread.fpu_counter = 0;
                p->thread.io_bitmap_ptr = NULL;
                memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
                return 0;
        p->thread.ip = (unsigned long) ret_from_fork;
        task_user_gs(p) = get_user_gs(current_pt_regs());
 
-       p->fpu_counter = 0;
+       p->thread.fpu_counter = 0;
        p->thread.io_bitmap_ptr = NULL;
        tsk = current;
        err = -ENOMEM;
 
        p->thread.sp = (unsigned long) childregs;
        p->thread.usersp = me->thread.usersp;
        set_tsk_thread_flag(p, TIF_FORK);
-       p->fpu_counter = 0;
+       p->thread.fpu_counter = 0;
        p->thread.io_bitmap_ptr = NULL;
 
        savesegment(gs, p->thread.gsindex);
 
                return;
        }
 
-       tsk->fpu_counter++;
+       tsk->thread.fpu_counter++;
 }
 EXPORT_SYMBOL_GPL(math_state_restore);