Add a linker assertion and compute the 0xcc padding dynamically so that
__x86_return_thunk is always cacheline-aligned. Leave the SYM_START()
macro in as the untraining doesn't need ENDBR annotations anyway.
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Link: https://lore.kernel.org/r/20230515140726.28689-1-bp@alien8.de
 
            "fixed_percpu_data is not at start of per-cpu area");
 #endif
 
+#ifdef CONFIG_RETHUNK
+. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+#endif
+
 #endif /* CONFIG_X86_64 */
 
  *    from re-poisioning the BTB prediction.
  */
        .align 64
-       .skip 63, 0xcc
+       .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
 SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
        ANNOTATE_NOENDBR
        /*