]> www.infradead.org Git - users/hch/dma-mapping.git/commitdiff
s390/fpu: remove anonymous union from struct fpu
authorHeiko Carstens <hca@linux.ibm.com>
Sat, 3 Feb 2024 10:45:15 +0000 (11:45 +0100)
committerHeiko Carstens <hca@linux.ibm.com>
Fri, 16 Feb 2024 13:30:16 +0000 (14:30 +0100)
The anonymous union within struct fpu contains a floating point register
array and a vector register array. Given that the vector register is always
present remove the floating point register array. For configurations
without vector registers save the floating point register contents within
their corresponding vector register location.

This allows to remove the union, and also to simplify ptrace and perf code.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
arch/s390/include/asm/fpu-types.h
arch/s390/include/asm/fpu.h
arch/s390/kernel/fpu.c
arch/s390/kernel/perf_regs.c
arch/s390/kernel/ptrace.c

index f5b6fab304013a8df51f0a88f4258c165200c4a7..8e6927c23bdc4f0be4820b2d195d21fe79c095b3 100644 (file)
 #include <asm/sigcontext.h>
 
 struct fpu {
-       __u32 fpc;              /* Floating-point control */
-       union {
-               /* Floating-point register save area */
-               freg_t fprs[__NUM_FPRS];
-               /* Vector register save area */
-               __vector128 vxrs[__NUM_VXRS];
-       };
+       u32 fpc;
+       __vector128 vxrs[__NUM_VXRS] __aligned(8);
 };
 
 /* In-kernel FPU state structure */
 struct kernel_fpu {
        int         mask;
        u32         fpc;
-       union {
-               freg_t fprs[__NUM_FPRS];
-               __vector128 vxrs[__NUM_VXRS];
-       };
+       __vector128 vxrs[__NUM_VXRS] __aligned(8);
 };
 
 #define DECLARE_KERNEL_FPU_ONSTACK(name)       \
index 4300eef243f9b0d0e059b453e952bfeb9ff30449..e706af26c5d07fa84a333b36239d609f0df66f89 100644 (file)
@@ -98,44 +98,68 @@ static __always_inline void load_vx_regs(__vector128 *vxrs)
        fpu_vlm(16, 31, &vxrs[16]);
 }
 
+static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
+{
+       fpu_std(0, &fprs[0 * offset]);
+       fpu_std(1, &fprs[1 * offset]);
+       fpu_std(2, &fprs[2 * offset]);
+       fpu_std(3, &fprs[3 * offset]);
+       fpu_std(4, &fprs[4 * offset]);
+       fpu_std(5, &fprs[5 * offset]);
+       fpu_std(6, &fprs[6 * offset]);
+       fpu_std(7, &fprs[7 * offset]);
+       fpu_std(8, &fprs[8 * offset]);
+       fpu_std(9, &fprs[9 * offset]);
+       fpu_std(10, &fprs[10 * offset]);
+       fpu_std(11, &fprs[11 * offset]);
+       fpu_std(12, &fprs[12 * offset]);
+       fpu_std(13, &fprs[13 * offset]);
+       fpu_std(14, &fprs[14 * offset]);
+       fpu_std(15, &fprs[15 * offset]);
+}
+
+static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
+{
+       fpu_ld(0, &fprs[0 * offset]);
+       fpu_ld(1, &fprs[1 * offset]);
+       fpu_ld(2, &fprs[2 * offset]);
+       fpu_ld(3, &fprs[3 * offset]);
+       fpu_ld(4, &fprs[4 * offset]);
+       fpu_ld(5, &fprs[5 * offset]);
+       fpu_ld(6, &fprs[6 * offset]);
+       fpu_ld(7, &fprs[7 * offset]);
+       fpu_ld(8, &fprs[8 * offset]);
+       fpu_ld(9, &fprs[9 * offset]);
+       fpu_ld(10, &fprs[10 * offset]);
+       fpu_ld(11, &fprs[11 * offset]);
+       fpu_ld(12, &fprs[12 * offset]);
+       fpu_ld(13, &fprs[13 * offset]);
+       fpu_ld(14, &fprs[14 * offset]);
+       fpu_ld(15, &fprs[15 * offset]);
+}
+
 static __always_inline void save_fp_regs(freg_t *fprs)
 {
-       fpu_std(0, &fprs[0]);
-       fpu_std(1, &fprs[1]);
-       fpu_std(2, &fprs[2]);
-       fpu_std(3, &fprs[3]);
-       fpu_std(4, &fprs[4]);
-       fpu_std(5, &fprs[5]);
-       fpu_std(6, &fprs[6]);
-       fpu_std(7, &fprs[7]);
-       fpu_std(8, &fprs[8]);
-       fpu_std(9, &fprs[9]);
-       fpu_std(10, &fprs[10]);
-       fpu_std(11, &fprs[11]);
-       fpu_std(12, &fprs[12]);
-       fpu_std(13, &fprs[13]);
-       fpu_std(14, &fprs[14]);
-       fpu_std(15, &fprs[15]);
+       __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
 }
 
 static __always_inline void load_fp_regs(freg_t *fprs)
 {
-       fpu_ld(0, &fprs[0]);
-       fpu_ld(1, &fprs[1]);
-       fpu_ld(2, &fprs[2]);
-       fpu_ld(3, &fprs[3]);
-       fpu_ld(4, &fprs[4]);
-       fpu_ld(5, &fprs[5]);
-       fpu_ld(6, &fprs[6]);
-       fpu_ld(7, &fprs[7]);
-       fpu_ld(8, &fprs[8]);
-       fpu_ld(9, &fprs[9]);
-       fpu_ld(10, &fprs[10]);
-       fpu_ld(11, &fprs[11]);
-       fpu_ld(12, &fprs[12]);
-       fpu_ld(13, &fprs[13]);
-       fpu_ld(14, &fprs[14]);
-       fpu_ld(15, &fprs[15]);
+       __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
+{
+       freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+       __save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
+{
+       freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+       __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
 }
 
 static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags)
@@ -170,7 +194,7 @@ static inline void save_kernel_fpu_regs(struct thread_struct *thread)
        if (likely(cpu_has_vx()))
                save_vx_regs(state->vxrs);
        else
-               save_fp_regs(state->fprs);
+               save_fp_regs_vx(state->vxrs);
 }
 
 static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
@@ -183,7 +207,7 @@ static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
        if (likely(cpu_has_vx()))
                load_vx_regs(state->vxrs);
        else
-               load_fp_regs(state->fprs);
+               load_fp_regs_vx(state->vxrs);
 }
 
 static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
@@ -206,19 +230,13 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
        fpregs->pad = 0;
        fpregs->fpc = fpu->fpc;
-       if (cpu_has_vx())
-               convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
-       else
-               memcpy((freg_t *)&fpregs->fprs, fpu->fprs, sizeof(fpregs->fprs));
+       convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
 }
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
        fpu->fpc = fpregs->fpc;
-       if (cpu_has_vx())
-               convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
-       else
-               memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, sizeof(fpregs->fprs));
+       convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
 }
 
 #endif /* _ASM_S390_FPU_H */
index a0ef3fc5d90f3d8c68278db9f4db9184834994c8..733e188951b78ed529becee21f47e80d38e3aae5 100644 (file)
@@ -24,7 +24,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
                fpu_stfpc(&state->fpc);
        if (!cpu_has_vx()) {
                if (flags & KERNEL_VXR_LOW)
-                       save_fp_regs(state->fprs);
+                       save_fp_regs_vx(state->vxrs);
                return;
        }
        mask = flags & KERNEL_VXR;
@@ -73,7 +73,7 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
                fpu_lfpc(&state->fpc);
        if (!cpu_has_vx()) {
                if (flags & KERNEL_VXR_LOW)
-                       load_fp_regs(state->fprs);
+                       load_fp_regs_vx(state->vxrs);
                return;
        }
        mask = flags & KERNEL_VXR;
@@ -115,7 +115,7 @@ void __load_user_fpu_regs(void)
        if (likely(cpu_has_vx()))
                load_vx_regs(state->vxrs);
        else
-               load_fp_regs(state->fprs);
+               load_fp_regs_vx(state->vxrs);
        clear_thread_flag(TIF_FPU);
 }
 
@@ -143,7 +143,7 @@ void save_user_fpu_regs(void)
        if (likely(cpu_has_vx()))
                save_vx_regs(state->vxrs);
        else
-               save_fp_regs(state->fprs);
+               save_fp_regs_vx(state->vxrs);
        set_thread_flag(TIF_FPU);
 out:
        local_irq_restore(flags);
index 511349b8bc5cd65cba50540ade0b4cfff98dc664..a6b058ee4a36c0f097958723f6e34bc09623ca6e 100644 (file)
@@ -19,10 +19,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
                        return 0;
 
                idx -= PERF_REG_S390_FP0;
-               if (cpu_has_vx())
-                       fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
-               else
-                       fp = current->thread.ufpu.fprs[idx];
+               fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
                return fp.ui;
        }
 
index f1ca7907317316eda03e7e1378c579a47bcf9d71..1cfed8b710b8c81f41350b1167a17886be30584a 100644 (file)
@@ -252,17 +252,10 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 
        } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
                /*
-                * floating point regs. are either in child->thread.ufpu
-                * or the child->thread.ufpu.vxrs array
+                * floating point regs. are in the child->thread.ufpu.vxrs array
                 */
                offset = addr - offsetof(struct user, regs.fp_regs.fprs);
-               if (cpu_has_vx())
-                       tmp = *(addr_t *)
-                              ((addr_t)child->thread.ufpu.vxrs + 2 * offset);
-               else
-                       tmp = *(addr_t *)
-                              ((addr_t)child->thread.ufpu.fprs + offset);
-
+               tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
        } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
                /*
                 * Handle access to the per_info structure.
@@ -400,17 +393,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 
        } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
                /*
-                * floating point regs. are either in child->thread.ufpu
-                * or the child->thread.ufpu.vxrs array
+                * floating point regs. are in the child->thread.ufpu.vxrs array
                 */
                offset = addr - offsetof(struct user, regs.fp_regs.fprs);
-               if (cpu_has_vx())
-                       *(addr_t *)((addr_t)
-                               child->thread.ufpu.vxrs + 2 * offset) = data;
-               else
-                       *(addr_t *)((addr_t)
-                               child->thread.ufpu.fprs + offset) = data;
-
+               *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
        } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
                /*
                 * Handle access to the per_info structure.
@@ -627,17 +613,10 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 
        } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
                /*
-                * floating point regs. are either in child->thread.ufpu
-                * or the child->thread.ufpu.vxrs array
+                * floating point regs. are in the child->thread.ufpu.vxrs array
                 */
                offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
-               if (cpu_has_vx())
-                       tmp = *(__u32 *)
-                              ((addr_t)child->thread.ufpu.vxrs + 2 * offset);
-               else
-                       tmp = *(__u32 *)
-                              ((addr_t)child->thread.ufpu.fprs + offset);
-
+               tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
        } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
                /*
                 * Handle access to the per_info structure.
@@ -753,17 +732,10 @@ static int __poke_user_compat(struct task_struct *child,
 
        } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
                /*
-                * floating point regs. are either in child->thread.ufpu
-                * or the child->thread.ufpu.vxrs array
+                * floating point regs. are in the child->thread.ufpu.vxrs array
                 */
                offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
-               if (cpu_has_vx())
-                       *(__u32 *)((addr_t)
-                               child->thread.ufpu.vxrs + 2 * offset) = tmp;
-               else
-                       *(__u32 *)((addr_t)
-                               child->thread.ufpu.fprs + offset) = tmp;
-
+               *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
        } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
                /*
                 * Handle access to the per_info structure.
@@ -912,12 +884,7 @@ static int s390_fpregs_set(struct task_struct *target,
 
        if (target == current)
                save_user_fpu_regs();
-
-       if (cpu_has_vx())
-               convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
-       else
-               memcpy(&fprs, target->thread.ufpu.fprs, sizeof(fprs));
-
+       convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
        if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
                u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
                rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
@@ -934,12 +901,7 @@ static int s390_fpregs_set(struct task_struct *target,
                                        fprs, offsetof(s390_fp_regs, fprs), -1);
        if (rc)
                return rc;
-
-       if (cpu_has_vx())
-               convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
-       else
-               memcpy(target->thread.ufpu.fprs, &fprs, sizeof(fprs));
-
+       convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
        return rc;
 }