Starting with version 2.24.51.
20140728 MIPS binutils complain loudly
about mixing soft-float and hard-float object files, leading to this
build failure since GCC is invoked with "-msoft-float" on MIPS:
{standard input}: Warning: .gnu_attribute 4,3 requires `softfloat'
  LD      arch/mips/alchemy/common/built-in.o
mipsel-softfloat-linux-gnu-ld: Warning: arch/mips/alchemy/common/built-in.o
 uses -msoft-float (set by arch/mips/alchemy/common/prom.o),
 arch/mips/alchemy/common/sleeper.o uses -mhard-float
To fix this, we detect if GAS is new enough to support "-msoft-float" command
option, and if it does, we can let GCC pass it to GAS;  but then we also need
to sprinkle the files which make use of floating point registers with the
necessary ".set hardfloat" directives.
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Matthew Fortune <Matthew.Fortune@imgtec.com>
Cc: Markos Chandras <Markos.Chandras@imgtec.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/8355/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
 
 KBUILD_AFLAGS_MODULE           += -mlong-calls
 KBUILD_CFLAGS_MODULE           += -mlong-calls
 
+#
+# pass -msoft-float to GAS if it supports it.  However on newer binutils
+# (specifically newer than 2.24.51.20140728) we then also need to explicitly
+# set ".set hardfloat" in all files which manipulate floating point registers.
+#
+ifneq ($(call as-option,-Wa$(comma)-msoft-float,),)
+       cflags-y                += -DGAS_HAS_SET_HARDFLOAT -Wa,-msoft-float
+endif
+
 cflags-y += -ffreestanding
 
 #
 
 #include <asm/mipsregs.h>
 
        .macro  fpu_save_single thread tmp=t0
+       .set push
+       SET_HARDFLOAT
        cfc1    \tmp,  fcr31
        swc1    $f0,  THREAD_FPR0_LS64(\thread)
        swc1    $f1,  THREAD_FPR1_LS64(\thread)
        swc1    $f30, THREAD_FPR30_LS64(\thread)
        swc1    $f31, THREAD_FPR31_LS64(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
+       .set pop
        .endm
 
        .macro  fpu_restore_single thread tmp=t0
+       .set push
+       SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
        lwc1    $f0,  THREAD_FPR0_LS64(\thread)
        lwc1    $f1,  THREAD_FPR1_LS64(\thread)
        lwc1    $f30, THREAD_FPR30_LS64(\thread)
        lwc1    $f31, THREAD_FPR31_LS64(\thread)
        ctc1    \tmp, fcr31
+       .set pop
        .endm
 
        .macro  cpu_save_nonscratch thread
 
 #endif /* CONFIG_CPU_MIPSR2 */
 
        .macro  fpu_save_16even thread tmp=t0
+       .set    push
+       SET_HARDFLOAT
        cfc1    \tmp, fcr31
        sdc1    $f0,  THREAD_FPR0_LS64(\thread)
        sdc1    $f2,  THREAD_FPR2_LS64(\thread)
        sdc1    $f28, THREAD_FPR28_LS64(\thread)
        sdc1    $f30, THREAD_FPR30_LS64(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
+       .set    pop
        .endm
 
        .macro  fpu_save_16odd thread
        .set    push
        .set    mips64r2
+       SET_HARDFLOAT
        sdc1    $f1,  THREAD_FPR1_LS64(\thread)
        sdc1    $f3,  THREAD_FPR3_LS64(\thread)
        sdc1    $f5,  THREAD_FPR5_LS64(\thread)
        .endm
 
        .macro  fpu_restore_16even thread tmp=t0
+       .set    push
+       SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
        ldc1    $f0,  THREAD_FPR0_LS64(\thread)
        ldc1    $f2,  THREAD_FPR2_LS64(\thread)
        .macro  fpu_restore_16odd thread
        .set    push
        .set    mips64r2
+       SET_HARDFLOAT
        ldc1    $f1,  THREAD_FPR1_LS64(\thread)
        ldc1    $f3,  THREAD_FPR3_LS64(\thread)
        ldc1    $f5,  THREAD_FPR5_LS64(\thread)
        .macro  cfcmsa  rd, cs
        .set    push
        .set    noat
+       SET_HARDFLOAT
        .insn
        .word   CFC_MSA_INSN | (\cs << 11)
        move    \rd, $1
        .macro  ctcmsa  cd, rs
        .set    push
        .set    noat
+       SET_HARDFLOAT
        move    $1, \rs
        .word   CTC_MSA_INSN | (\cd << 6)
        .set    pop
        .macro  ld_d    wd, off, base
        .set    push
        .set    noat
+       SET_HARDFLOAT
        add     $1, \base, \off
        .word   LDD_MSA_INSN | (\wd << 6)
        .set    pop
        .macro  st_d    wd, off, base
        .set    push
        .set    noat
+       SET_HARDFLOAT
        add     $1, \base, \off
        .word   STD_MSA_INSN | (\wd << 6)
        .set    pop
        .macro  copy_u_w        rd, ws, n
        .set    push
        .set    noat
+       SET_HARDFLOAT
        .insn
        .word   COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
        /* move triggers an assembler bug... */
        .macro  copy_u_d        rd, ws, n
        .set    push
        .set    noat
+       SET_HARDFLOAT
        .insn
        .word   COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
        /* move triggers an assembler bug... */
        .macro  insert_w        wd, n, rs
        .set    push
        .set    noat
+       SET_HARDFLOAT
        /* move triggers an assembler bug... */
        or      $1, \rs, zero
        .word   INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
        .macro  insert_d        wd, n, rs
        .set    push
        .set    noat
+       SET_HARDFLOAT
        /* move triggers an assembler bug... */
        or      $1, \rs, zero
        .word   INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
        st_d    31, THREAD_FPR31, \thread
        .set    push
        .set    noat
+       SET_HARDFLOAT
        cfcmsa  $1, MSA_CSR
        sw      $1, THREAD_MSA_CSR(\thread)
        .set    pop
        .macro  msa_restore_all thread
        .set    push
        .set    noat
+       SET_HARDFLOAT
        lw      $1, THREAD_MSA_CSR(\thread)
        ctcmsa  MSA_CSR, $1
        .set    pop
        .macro  msa_init_all_upper
        .set    push
        .set    noat
+       SET_HARDFLOAT
        not     $1, zero
        msa_init_upper  0
        .set    pop
 
 
 #include <asm/sgidefs.h>
 
+/*
+ * starting with binutils 2.24.51.20140729, MIPS binutils warn about mixing
+ * hardfloat and softfloat object files.  The kernel build uses soft-float by
+ * default, so we also need to pass -msoft-float along to GAS if it supports it.
+ * But this in turn causes assembler errors in files which access hardfloat
+ * registers.  We detect if GAS supports "-msoft-float" in the Makefile and
+ * explicitly put ".set hardfloat" where floating point registers are touched.
+ */
+#ifdef GAS_HAS_SET_HARDFLOAT
+#define SET_HARDFLOAT .set hardfloat
+#else
+#define SET_HARDFLOAT
+#endif
+
 #if _MIPS_SIM == _MIPS_SIM_ABI32
 
 /*
 
        if (is_msa_enabled()) {
                if (save) {
                        save_msa(current);
-                       asm volatile("cfc1 %0, $31"
-                               : "=r"(current->thread.fpu.fcr31));
+                       current->thread.fpu.fcr31 =
+                                       read_32bit_cp1_register(CP1_STATUS);
                }
                disable_msa();
                clear_thread_flag(TIF_USEDMSA);
 
 /*
  * Macros to access the floating point coprocessor control registers
  */
-#define read_32bit_cp1_register(source)                                        \
+#define _read_32bit_cp1_register(source, gas_hardfloat)                        \
 ({                                                                     \
        int __res;                                                      \
                                                                        \
        "       # gas fails to assemble cfc1 for some archs,    \n"     \
        "       # like Octeon.                                  \n"     \
        "       .set    mips1                                   \n"     \
+       "       "STR(gas_hardfloat)"                            \n"     \
        "       cfc1    %0,"STR(source)"                        \n"     \
        "       .set    pop                                     \n"     \
        : "=r" (__res));                                                \
        __res;                                                          \
 })
 
+#ifdef GAS_HAS_SET_HARDFLOAT
+#define read_32bit_cp1_register(source)                                        \
+       _read_32bit_cp1_register(source, .set hardfloat)
+#else
+#define read_32bit_cp1_register(source)                                        \
+       _read_32bit_cp1_register(source, )
+#endif
+
 #ifdef HAVE_AS_DSP
 #define rddsp(mask)                                                    \
 ({                                                                     \
 
                case mm_bc1t_op:
                        preempt_disable();
                        if (is_fpu_owner())
-                               asm volatile("cfc1\t%0,$31" : "=r" (fcr31));
+                               fcr31 = read_32bit_cp1_register(CP1_STATUS);
                        else
                                fcr31 = current->thread.fpu.fcr31;
                        preempt_enable();
        case cop1_op:
                preempt_disable();
                if (is_fpu_owner())
-                       asm volatile(
-                               ".set push\n"
-                               "\t.set mips1\n"
-                               "\tcfc1\t%0,$31\n"
-                               "\t.set pop" : "=r" (fcr31));
+                       fcr31 = read_32bit_cp1_register(CP1_STATUS);
                else
                        fcr31 = current->thread.fpu.fcr31;
                preempt_enable();
 
        .set    push
        /* gas fails to assemble cfc1 for some archs (octeon).*/ \
        .set    mips1
+       SET_HARDFLOAT
        cfc1    a1, fcr31
        li      a2, ~(0x3f << 12)
        and     a2, a1
 
        .set    mips1
        /* Save floating point context */
 LEAF(_save_fp_context)
+       .set    push
+       SET_HARDFLOAT
        li      v0, 0                                   # assume success
        cfc1    t1,fcr31
        EX(swc1 $f0,(SC_FPREGS+0)(a0))
        EX(sw   t1,(SC_FPC_CSR)(a0))
        cfc1    t0,$0                           # implementation/version
        jr      ra
+       .set    pop
        .set    nomacro
         EX(sw  t0,(SC_FPC_EIR)(a0))
        .set    macro
  * stack frame which might have been changed by the user.
  */
 LEAF(_restore_fp_context)
+       .set    push
+       SET_HARDFLOAT
        li      v0, 0                                   # assume success
        EX(lw t0,(SC_FPC_CSR)(a0))
        EX(lwc1 $f0,(SC_FPREGS+0)(a0))
        EX(lwc1 $f31,(SC_FPREGS+248)(a0))
        jr      ra
         ctc1   t0,fcr31
+       .set    pop
        END(_restore_fp_context)
        .set    reorder
 
 
 
 #define FPU_DEFAULT  0x00000000
 
+       .set push
+       SET_HARDFLOAT
+
 LEAF(_init_fpu)
        mfc0    t0, CP0_STATUS
        li      t1, ST0_CU1
        mtc1    t0, $f31
        jr      ra
        END(_init_fpu)
+
+       .set pop
 
 #include <asm/asm-offsets.h>
 #include <asm/regdef.h>
 
+/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
+#undef fp
+
        .macro  EX insn, reg, src
        .set    push
+       SET_HARDFLOAT
        .set    nomacro
 .ex\@: \insn   \reg, \src
        .set    pop
        .set    arch=r4000
 
 LEAF(_save_fp_context)
+       .set    push
+       SET_HARDFLOAT
        cfc1    t1, fcr31
+       .set    pop
 
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
        .set    push
+       SET_HARDFLOAT
 #ifdef CONFIG_CPU_MIPS32_R2
-       .set    mips64r2
+       .set    mips32r2
+       .set    fp=64
        mfc0    t0, CP0_STATUS
        sll     t0, t0, 5
        bgez    t0, 1f                  # skip storing odd if FR=0
 1:     .set    pop
 #endif
 
+       .set push
+       SET_HARDFLOAT
        /* Store the 16 even double precision registers */
        EX      sdc1 $f0, SC_FPREGS+0(a0)
        EX      sdc1 $f2, SC_FPREGS+16(a0)
        EX      sw t1, SC_FPC_CSR(a0)
        jr      ra
         li     v0, 0                                   # success
+       .set pop
        END(_save_fp_context)
 
 #ifdef CONFIG_MIPS32_COMPAT
        /* Save 32-bit process floating point context */
 LEAF(_save_fp_context32)
+       .set push
+       SET_HARDFLOAT
        cfc1    t1, fcr31
 
        mfc0    t0, CP0_STATUS
        EX      sw t1, SC32_FPC_CSR(a0)
        cfc1    t0, $0                          # implementation/version
        EX      sw t0, SC32_FPC_EIR(a0)
+       .set pop
 
        jr      ra
         li     v0, 0                                   # success
 
 #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
        .set    push
+       SET_HARDFLOAT
 #ifdef CONFIG_CPU_MIPS32_R2
-       .set    mips64r2
+       .set    mips32r2
+       .set    fp=64
        mfc0    t0, CP0_STATUS
        sll     t0, t0, 5
        bgez    t0, 1f                  # skip loading odd if FR=0
        EX      ldc1 $f31, SC_FPREGS+248(a0)
 1:     .set pop
 #endif
+       .set push
+       SET_HARDFLOAT
        EX      ldc1 $f0, SC_FPREGS+0(a0)
        EX      ldc1 $f2, SC_FPREGS+16(a0)
        EX      ldc1 $f4, SC_FPREGS+32(a0)
        EX      ldc1 $f28, SC_FPREGS+224(a0)
        EX      ldc1 $f30, SC_FPREGS+240(a0)
        ctc1    t1, fcr31
+       .set pop
        jr      ra
         li     v0, 0                                   # success
        END(_restore_fp_context)
 #ifdef CONFIG_MIPS32_COMPAT
 LEAF(_restore_fp_context32)
        /* Restore an o32 sigcontext.  */
+       .set push
+       SET_HARDFLOAT
        EX      lw t1, SC32_FPC_CSR(a0)
 
        mfc0    t0, CP0_STATUS
        ctc1    t1, fcr31
        jr      ra
         li     v0, 0                                   # success
+       .set pop
        END(_restore_fp_context32)
 #endif
 
 
 
 #include <asm/asmmacro.h>
 
+/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
+#undef fp
+
 /*
  * Offset to the current process status flags, the first 32 bytes of the
  * stack are not used.
        bgtz    a3, 1f
 
        /* Save 128b MSA vector context + scalar FP control & status. */
+       .set push
+       SET_HARDFLOAT
        cfc1    t1, fcr31
        msa_save_all    a0
+       .set pop        /* SET_HARDFLOAT */
+
        sw      t1, THREAD_FCR31(a0)
        b       2f
 
 
 #define FPU_DEFAULT  0x00000000
 
+       .set push
+       SET_HARDFLOAT
+
 LEAF(_init_fpu)
        mfc0    t0, CP0_STATUS
        li      t1, ST0_CU1
 
 #ifdef CONFIG_CPU_MIPS32_R2
        .set    push
-       .set    mips64r2
+       .set    mips32r2
+       .set    fp=64
        sll     t0, t0, 5                       # is Status.FR set?
        bgez    t0, 1f                          # no: skip setting upper 32b
 
 #endif
        jr      ra
        END(_init_fpu)
+
+       .set pop        /* SET_HARDFLOAT */
 
 
        .set    noreorder
        .set    mips2
+       .set    push
+       SET_HARDFLOAT
+
        /* Save floating point context */
        LEAF(_save_fp_context)
        mfc0    t0,CP0_STATUS
 1:     jr      ra
         nop
        END(_restore_fp_context)
+
+       .set pop        /* SET_HARDFLOAT */
 
                if (insn.i_format.rs == bc_op) {
                        preempt_disable();
                        if (is_fpu_owner())
-                               asm volatile(
-                                       ".set push\n"
-                                       "\t.set mips1\n"
-                                       "\tcfc1\t%0,$31\n"
-                                       "\t.set pop" : "=r" (fcr31));
+                               fcr31 = read_32bit_cp1_register(CP1_STATUS);
                        else
                                fcr31 = current->thread.fpu.fcr31;
                        preempt_enable();