return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
 }
 
-extern void sve_save_state(void *state, u32 *pfpsr);
+extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
-                          unsigned long vq_minus_1);
-extern void sve_flush_live(unsigned long vq_minus_1);
+                          int restore_ffr, unsigned long vq_minus_1);
+extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 extern void sve_set_vq(unsigned long vq_minus_1);
 
 
 .macro sve_flush_z
  _for n, 0, 31, _sve_flush_z   \n
 .endm
-.macro sve_flush_p_ffr
+.macro sve_flush_p
  _for n, 0, 15, _sve_pfalse    \n
+.endm
+.macro sve_flush_ffr
                _sve_wrffr      0
 .endm
 
-.macro sve_save nxbase, xpfpsr, nxtmp
+.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
  _for n, 0, 31,        _sve_str_v      \n, \nxbase, \n - 34
  _for n, 0, 15,        _sve_str_p      \n, \nxbase, \n - 16
+               cbz             \save_ffr, 921f
                _sve_rdffr      0
                _sve_str_p      0, \nxbase
                _sve_ldr_p      0, \nxbase, -16
-
+               b               922f
+921:
+               str             xzr, [x\nxbase]         // Zero out FFR
+922:
                mrs             x\nxtmp, fpsr
                str             w\nxtmp, [\xpfpsr]
                mrs             x\nxtmp, fpcr
                str             w\nxtmp, [\xpfpsr, #4]
 .endm
 
-.macro __sve_load nxbase, xpfpsr, nxtmp
+.macro __sve_load nxbase, xpfpsr, restore_ffr, nxtmp
  _for n, 0, 31,        _sve_ldr_v      \n, \nxbase, \n - 34
+               cbz             \restore_ffr, 921f
                _sve_ldr_p      0, \nxbase
                _sve_wrffr      0
+921:
  _for n, 0, 15,        _sve_ldr_p      \n, \nxbase, \n - 16
 
                ldr             w\nxtmp, [\xpfpsr]
                msr             fpcr, x\nxtmp
 .endm
 
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
+.macro sve_load nxbase, xpfpsr, restore_ffr, xvqminus1, nxtmp, xtmp2
                sve_load_vq     \xvqminus1, x\nxtmp, \xtmp2
-               __sve_load      \nxbase, \xpfpsr, \nxtmp
+               __sve_load      \nxbase, \xpfpsr, \restore_ffr, \nxtmp
 .endm
 
  *
  * x0 - pointer to buffer for state
  * x1 - pointer to storage for FPSR
+ * x2 - Save FFR if non-zero
  */
 SYM_FUNC_START(sve_save_state)
-       sve_save 0, x1, 2
+       sve_save 0, x1, x2, 3
        ret
 SYM_FUNC_END(sve_save_state)
 
  *
  * x0 - pointer to buffer for state
  * x1 - pointer to storage for FPSR
- * x2 - VQ-1
+ * x2 - Restore FFR if non-zero
+ * x3 - VQ-1
  */
 SYM_FUNC_START(sve_load_state)
-       sve_load 0, x1, x2, 3, x4
+       sve_load 0, x1, x2, x3, 4, x5
        ret
 SYM_FUNC_END(sve_load_state)
 
  * VQ must already be configured by caller, any further updates of VQ
  * will need to ensure that the register state remains valid.
  *
- * x0 = VQ - 1
+ * x0 = include FFR?
+ * x1 = VQ - 1
  */
 SYM_FUNC_START(sve_flush_live)
-       cbz             x0, 1f  // A VQ-1 of 0 is 128 bits so no extra Z state
+       cbz             x1, 1f  // A VQ-1 of 0 is 128 bits so no extra Z state
        sve_flush_z
-1:     sve_flush_p_ffr
-       ret
+1:     sve_flush_p
+       tbz             x0, #0, 2f
+       sve_flush_ffr
+2:     ret
 SYM_FUNC_END(sve_flush_live)
 
 #endif /* CONFIG_ARM64_SVE */
 
 
        if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE))
                sve_load_state(sve_pffr(¤t->thread),
-                              ¤t->thread.uw.fpsimd_state.fpsr,
+                              ¤t->thread.uw.fpsimd_state.fpsr, true,
                               sve_vq_from_vl(current->thread.sve_vl) - 1);
        else
                fpsimd_load_state(¤t->thread.uw.fpsimd_state);
 
                sve_save_state((char *)last->sve_state +
                                        sve_ffr_offset(last->sve_vl),
-                              &last->st->fpsr);
+                              &last->st->fpsr, true);
        } else {
                fpsimd_save_state(last->st);
        }
                unsigned long vq_minus_one =
                        sve_vq_from_vl(current->thread.sve_vl) - 1;
                sve_set_vq(vq_minus_one);
-               sve_flush_live(vq_minus_one);
+               sve_flush_live(true, vq_minus_one);
                fpsimd_bind_task_to_cpu();
        } else {
                fpsimd_to_sve(current);
                        __this_cpu_write(efi_sve_state_used, true);
 
                        sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
-                                      &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
+                                      &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
+                                      true);
                } else {
                        fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
                }
 
                        sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
                                       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
+                                      true,
                                       sve_vq_from_vl(sve_get_vl()) - 1);
 
                        __this_cpu_write(efi_sve_state_used, false);
 
 SYM_FUNC_END(__fpsimd_restore_state)
 
 SYM_FUNC_START(__sve_restore_state)
-       __sve_load 0, x1, 2
+       mov     x2, #1
+       __sve_load 0, x1, x2, 3
        ret
 SYM_FUNC_END(__sve_restore_state)
 
 SYM_FUNC_START(__sve_save_state)
-       sve_save 0, x1, 2
+       mov     x2, #1
+       sve_save 0, x1, x2, 3
        ret
 SYM_FUNC_END(__sve_save_state)