]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
s390/fpu: limit save and restore to used registers
authorHeiko Carstens <hca@linux.ibm.com>
Sat, 3 Feb 2024 10:45:18 +0000 (11:45 +0100)
committerHeiko Carstens <hca@linux.ibm.com>
Fri, 16 Feb 2024 13:30:16 +0000 (14:30 +0100)
The first invocation of kernel_fpu_begin() after switching from user to
kernel context will save all vector registers, even if only parts of the
vector registers are used within the kernel fpu context. Given that save
and restore of all vector registers is quite expensive change the current
approach in several ways:

- Instead of saving and restoring all user registers limit this to those
  registers which are actually used within an kernel fpu context.

- On context switch save all remaining user fpu registers, so they can be
  restored when the task is rescheduled.

- Saving user registers within kernel_fpu_begin() is done without disabling
  and enabling interrupts - which also slightly reduces runtime. In worst
  case (e.g. interrupt context uses the same registers) this may lead to
  the situation that registers are saved several times, however the
  assumption is that this will not happen frequently, so that the new
  method is faster in nearly all cases.

- save_user_fpu_regs() can still be called from all contexts and saves all
  (or all remaining) user registers to a tasks ufpu user fpu save area.

Overall this reduces the time required to save and restore the user fpu
context for nearly all cases.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
arch/s390/include/asm/entry-common.h
arch/s390/include/asm/fpu.h
arch/s390/include/asm/processor.h
arch/s390/kernel/fpu.c

index 659e07d7f31a8df23ba01f44145ae40b1834e533..7f5004065e8aa2b1598f37f501e0c2537d6529a2 100644 (file)
@@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
 
 static __always_inline void arch_exit_to_user_mode(void)
 {
-       if (test_thread_flag(TIF_FPU))
-               __load_user_fpu_regs();
+       load_user_fpu_regs();
 
        if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
                debug_user_asce(1);
index c1b3920092a1c96127b06ce3a4448f2709436c5f..c84cb33913e2946f55cea4228fe5a241ad35e86d 100644 (file)
@@ -58,10 +58,6 @@ static inline bool cpu_has_vx(void)
        return likely(test_facility(129));
 }
 
-void save_user_fpu_regs(void);
-void load_user_fpu_regs(void);
-void __load_user_fpu_regs(void);
-
 enum {
        KERNEL_FPC_BIT = 0,
        KERNEL_VXR_V0V7_BIT,
@@ -83,6 +79,8 @@ enum {
 #define KERNEL_VXR             (KERNEL_VXR_LOW    | KERNEL_VXR_HIGH)
 #define KERNEL_FPR             (KERNEL_FPC        | KERNEL_VXR_LOW)
 
+void load_fpu_state(struct fpu *state, int flags);
+void save_fpu_state(struct fpu *state, int flags);
 void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
 void __kernel_fpu_end(struct kernel_fpu *state, int flags);
 
@@ -162,26 +160,57 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
        __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
 }
 
-static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+static inline void load_user_fpu_regs(void)
+{
+       struct thread_struct *thread = &current->thread;
+
+       if (!thread->ufpu_flags)
+               return;
+       load_fpu_state(&thread->ufpu, thread->ufpu_flags);
+       thread->ufpu_flags = 0;
+}
+
+static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
 {
-       state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
-       if (!test_thread_flag(TIF_FPU)) {
-               /* Save user space FPU state and register contents */
-               save_user_fpu_regs();
-       } else if (state->hdr.mask & flags) {
-               /* Save FPU/vector register in-use by the kernel */
+       save_fpu_state(&thread->ufpu, flags);
+       __atomic_or(flags, &thread->ufpu_flags);
+}
+
+static inline void save_user_fpu_regs(void)
+{
+       struct thread_struct *thread = &current->thread;
+       int mask, flags;
+
+       mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
+       flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
+       if (flags)
+               __save_user_fpu_regs(thread, flags);
+       barrier();
+       WRITE_ONCE(thread->kfpu_flags, mask);
+}
+
+static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+       struct thread_struct *thread = &current->thread;
+       int mask, uflags;
+
+       mask = __atomic_or(flags, &thread->kfpu_flags);
+       state->hdr.mask = mask;
+       uflags = READ_ONCE(thread->ufpu_flags);
+       if ((uflags & flags) != flags)
+               __save_user_fpu_regs(thread, ~uflags & flags);
+       if (mask & flags)
                __kernel_fpu_begin(state, flags);
-       }
-       __atomic_or(flags, &current->thread.kfpu_flags);
 }
 
-static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
+static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
 {
-       WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
-       if (state->hdr.mask & flags) {
-               /* Restore FPU/vector register in-use by the kernel */
+       int mask = state->hdr.mask;
+
+       if (mask & flags)
                __kernel_fpu_end(state, flags);
-       }
+       barrier();
+       WRITE_ONCE(current->thread.kfpu_flags, mask);
 }
 
 void __kernel_fpu_invalid_size(void);
@@ -222,28 +251,16 @@ static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
 
 static inline void save_kernel_fpu_regs(struct thread_struct *thread)
 {
-       struct fpu *state = &thread->kfpu;
-
        if (!thread->kfpu_flags)
                return;
-       fpu_stfpc(&state->fpc);
-       if (likely(cpu_has_vx()))
-               save_vx_regs(state->vxrs);
-       else
-               save_fp_regs_vx(state->vxrs);
+       save_fpu_state(&thread->kfpu, thread->kfpu_flags);
 }
 
 static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
 {
-       struct fpu *state = &thread->kfpu;
-
        if (!thread->kfpu_flags)
                return;
-       fpu_lfpc(&state->fpc);
-       if (likely(cpu_has_vx()))
-               load_vx_regs(state->vxrs);
-       else
-               load_fp_regs_vx(state->vxrs);
+       load_fpu_state(&thread->kfpu, thread->kfpu_flags);
 }
 
 static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
index ecce58abf3dba9b8a6d3385e6b6e0dc7f8fbf70f..7cf00cf8fb0bc6197721987037c5f53ded7870bf 100644 (file)
@@ -166,6 +166,7 @@ struct thread_struct {
        unsigned int gmap_write_flag;           /* gmap fault write indication */
        unsigned int gmap_int_code;             /* int code of last gmap fault */
        unsigned int gmap_pfault;               /* signal of a pending guest pfault */
+       int ufpu_flags;                         /* user fpu flags */
        int kfpu_flags;                         /* kernel fpu flags */
 
        /* Per-thread information related to debugging */
index 62e9befe7890a2806d0515ad3660eddde6d7946c..fa90bbdc5ef9467c70dcba01f11e2d2ed273e115 100644 (file)
@@ -107,45 +107,87 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
 
-void __load_user_fpu_regs(void)
+void load_fpu_state(struct fpu *state, int flags)
 {
-       struct fpu *state = &current->thread.ufpu;
-
-       fpu_lfpc_safe(&state->fpc);
-       if (likely(cpu_has_vx()))
-               load_vx_regs(state->vxrs);
-       else
-               load_fp_regs_vx(state->vxrs);
-       clear_thread_flag(TIF_FPU);
-}
+       __vector128 *vxrs = &state->vxrs[0];
+       int mask;
 
-void load_user_fpu_regs(void)
-{
-       raw_local_irq_disable();
-       __load_user_fpu_regs();
-       raw_local_irq_enable();
+       if (flags & KERNEL_FPC)
+               fpu_lfpc(&state->fpc);
+       if (!cpu_has_vx()) {
+               if (flags & KERNEL_VXR_V0V7)
+                       load_fp_regs_vx(state->vxrs);
+               return;
+       }
+       mask = flags & KERNEL_VXR;
+       if (mask == KERNEL_VXR) {
+               fpu_vlm(0, 15, &vxrs[0]);
+               fpu_vlm(16, 31, &vxrs[16]);
+               return;
+       }
+       if (mask == KERNEL_VXR_MID) {
+               fpu_vlm(8, 23, &vxrs[8]);
+               return;
+       }
+       mask = flags & KERNEL_VXR_LOW;
+       if (mask) {
+               if (mask == KERNEL_VXR_LOW)
+                       fpu_vlm(0, 15, &vxrs[0]);
+               else if (mask == KERNEL_VXR_V0V7)
+                       fpu_vlm(0, 7, &vxrs[0]);
+               else
+                       fpu_vlm(8, 15, &vxrs[8]);
+       }
+       mask = flags & KERNEL_VXR_HIGH;
+       if (mask) {
+               if (mask == KERNEL_VXR_HIGH)
+                       fpu_vlm(16, 31, &vxrs[16]);
+               else if (mask == KERNEL_VXR_V16V23)
+                       fpu_vlm(16, 23, &vxrs[16]);
+               else
+                       fpu_vlm(24, 31, &vxrs[24]);
+       }
 }
-EXPORT_SYMBOL(load_user_fpu_regs);
 
-void save_user_fpu_regs(void)
+void save_fpu_state(struct fpu *state, int flags)
 {
-       unsigned long flags;
-       struct fpu *state;
-
-       local_irq_save(flags);
-
-       if (test_thread_flag(TIF_FPU))
-               goto out;
-
-       state = &current->thread.ufpu;
+       __vector128 *vxrs = &state->vxrs[0];
+       int mask;
 
-       fpu_stfpc(&state->fpc);
-       if (likely(cpu_has_vx()))
-               save_vx_regs(state->vxrs);
-       else
-               save_fp_regs_vx(state->vxrs);
-       set_thread_flag(TIF_FPU);
-out:
-       local_irq_restore(flags);
+       if (flags & KERNEL_FPC)
+               fpu_stfpc(&state->fpc);
+       if (!cpu_has_vx()) {
+               if (flags & KERNEL_VXR_LOW)
+                       save_fp_regs_vx(state->vxrs);
+               return;
+       }
+       mask = flags & KERNEL_VXR;
+       if (mask == KERNEL_VXR) {
+               fpu_vstm(0, 15, &vxrs[0]);
+               fpu_vstm(16, 31, &vxrs[16]);
+               return;
+       }
+       if (mask == KERNEL_VXR_MID) {
+               fpu_vstm(8, 23, &vxrs[8]);
+               return;
+       }
+       mask = flags & KERNEL_VXR_LOW;
+       if (mask) {
+               if (mask == KERNEL_VXR_LOW)
+                       fpu_vstm(0, 15, &vxrs[0]);
+               else if (mask == KERNEL_VXR_V0V7)
+                       fpu_vstm(0, 7, &vxrs[0]);
+               else
+                       fpu_vstm(8, 15, &vxrs[8]);
+       }
+       mask = flags & KERNEL_VXR_HIGH;
+       if (mask) {
+               if (mask == KERNEL_VXR_HIGH)
+                       fpu_vstm(16, 31, &vxrs[16]);
+               else if (mask == KERNEL_VXR_V16V23)
+                       fpu_vstm(16, 23, &vxrs[16]);
+               else
+                       fpu_vstm(24, 31, &vxrs[24]);
+       }
 }
-EXPORT_SYMBOL(save_user_fpu_regs);
+EXPORT_SYMBOL(save_fpu_state);