]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64: revert pause instruction patch for atomic backoff and cpu_relax()
authorBabu Moger <babu.moger@oracle.com>
Tue, 27 Jun 2017 15:51:45 +0000 (09:51 -0600)
committerAllen Pais <allen.pais@oracle.com>
Tue, 11 Jul 2017 07:36:48 +0000 (13:06 +0530)
This patch reverts the commit e9b9eb59ffcdee09ec96b040f85c919618f4043e
("sparc64: Use pause instruction when available").

This all started with our TPCC results on UEK4. During T7 testing, the TPCC results
were much lower compared to UEK2. The atomic calls like atomic_add and atomic_sub
were showing top on perf results. Karl found out that this was caused by
the upstream commit e9b9eb59ffcdee09ec96b040f85c919618f4043e
(sparc64: Use pause instruction when available). After reverting this commit on UEK4,
the TPCC numbers were back to UEK2 level. However, things changed after Atish's
scheduler fixes on UEK4. The TPCC numbers improved and the upstream commit
(sparc64: Use pause instruction when available) did not seem make any difference.
So, Karl's "revert pause instruction" patch was removed from UEK4.

Now again with T8 testing, we are seeing the same old behaviour. The atomic calls
like atomic_add and atomic_sub are showing top on perf results. After trying with
Karl's patch(revert pause instruction patch for atomic backoff) the TPCC numbers
improved(about %25 better than T7) and atomic calls are not showing on top in perf.

So, we are adding this patch back again. This is a temporary fix. Long term solution
is still in the discussion. The original patch is from Karl.
http://ca-git.us.oracle.com/?p=linux-uek-sparc.git;a=commit;h=f214eebf2223d23a2b1499be5b54719bdd7651e3

All the credit should go to Karl. Rebased it on latest sparc tree.

Orabug: 26306832

Signed-off-by: Karl Volz <karl.volz@Oracle.com>
Reviewed-by: Atish Patra <atish.patra@oracle.com>
Signed-off-by: Henry Willard <henry.willard@oracle.com>
Signed-off-by: Babu Moger <babu.moger@oracle.com>
Reviewed-by: Karl Volz <karl.volz@Oracle.com>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
arch/sparc/include/asm/backoff.h
arch/sparc/include/asm/processor_64.h
arch/sparc/kernel/entry.h
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/vmlinux.lds.S

index 4e02086b839cf2146431265ce77dbb984840d33e..5653a6fc11696a2cf44092d2a5fe0a8cbc4a659c 100644 (file)
  * between 40 and 50 cpu cycles.
  *
  * For SPARC-T4 and later we have a special "pause" instruction
- * available.  This is implemented using writes to register %asr27.
- * The cpu will block the number of cycles written into the register,
- * unless a disrupting trap happens first.  SPARC-T4 specifically
- * implements pause with a granularity of 8 cycles.  Each strand has
- * an internal pause counter which decrements every 8 cycles.  So the
- * chip shifts the %asr27 value down by 3 bits, and writes the result
- * into the pause counter.  If a value smaller than 8 is written, the
- * chip blocks for 1 cycle.
+ * available.  NOTE: pause is currently not used due to performance degradation
+ * in M7/M8 platforms.
  *
- * To achieve the same amount of backoff as the three %ccr reads give
- * on earlier chips, we shift the backoff value up by 7 bits.  (Three
- * %ccr reads block for about 128 cycles, 1 << 7 == 128) We write the
- * whole amount we want to block into the pause register, rather than
- * loop writing 128 each time.
  */
 
 #define BACKOFF_LIMIT  (4 * 1024)
 #define BACKOFF_LABEL(spin_label, continue_label) \
        spin_label
 
-#define BACKOFF_SPIN(reg, tmp, label)          \
-       mov             reg, tmp;               \
-88:    rd              %ccr, %g0;              \
-       rd              %ccr, %g0;              \
-       rd              %ccr, %g0;              \
-       .section        .pause_3insn_patch,"ax";\
-       .word           88b;                    \
-       sllx            tmp, 7, tmp;            \
-       wr              tmp, 0, %asr27;         \
-       clr             tmp;                    \
-       .previous;                              \
-       brnz,pt         tmp, 88b;               \
-        sub            tmp, 1, tmp;            \
-       set             BACKOFF_LIMIT, tmp;     \
-       cmp             reg, tmp;               \
-       bg,pn           %xcc, label;            \
-        nop;                                   \
-       ba,pt           %xcc, label;            \
-        sllx           reg, 1, reg;
+#define BACKOFF_SPIN(reg, tmp, label)  \
+   mov   reg, tmp; \
+88:   brnz,pt  tmp, 88b; \
+    sub  tmp, 1, tmp; \
+   set   BACKOFF_LIMIT, tmp; \
+   cmp   reg, tmp; \
+   bg,pn %xcc, label; \
+    nop; \
+   ba,pt %xcc, label; \
+    sllx reg, 1, reg;
 
 #else
 
index ebb6fea53b842893c6debcdadc2c653b5c880186..5edfa483444872893fcaa269e36b422d49f23924 100644 (file)
@@ -205,25 +205,12 @@ unsigned long get_wchan(struct task_struct *task);
  * To make a long story short, we are trying to yield the current cpu
  * strand during busy loops.
  */
-#ifdef BUILD_VDSO
 #define cpu_relax()    asm volatile("\n99:\n\t"                        \
                                     "rd        %%ccr, %%g0\n\t"        \
                                     "rd        %%ccr, %%g0\n\t"        \
                                     "rd        %%ccr, %%g0\n\t"        \
                                     ::: "memory")
-#else /* ! BUILD_VDSO */
-#define cpu_relax()    asm volatile("\n99:\n\t"                        \
-                                    "rd        %%ccr, %%g0\n\t"        \
-                                    "rd        %%ccr, %%g0\n\t"        \
-                                    "rd        %%ccr, %%g0\n\t"        \
-                                    ".section  .pause_3insn_patch,\"ax\"\n\t"\
-                                    ".word     99b\n\t"                \
-                                    "wr        %%g0, 128, %%asr27\n\t" \
-                                    "nop\n\t"                          \
-                                    "nop\n\t"                          \
-                                    ".previous"                        \
-                                    ::: "memory")
-#endif
+
 #define cpu_relax_lowlatency() cpu_relax()
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
index ab30aa21d33fb39ea3c0c9e1b32eecdff669a09b..4f4384460617fca0801f2eb8f5400486fafadf69 100644 (file)
@@ -58,13 +58,6 @@ struct popc_6insn_patch_entry {
 extern struct popc_6insn_patch_entry __popc_6insn_patch,
        __popc_6insn_patch_end;
 
-struct pause_patch_entry {
-       unsigned int    addr;
-       unsigned int    insns[3];
-};
-extern struct pause_patch_entry __pause_3insn_patch,
-       __pause_3insn_patch_end;
-
 void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
                             struct sun4v_1insn_patch_entry *);
 void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
index 410a09a107d610fdaf4187269507916048dfc7d0..a4fddcbfc9e45b7112a12bbc21928c4ba02b2fdf 100644 (file)
@@ -331,25 +331,6 @@ static void __init popc_patch(void)
        }
 }
 
-static void __init pause_patch(void)
-{
-       struct pause_patch_entry *p;
-
-       p = &__pause_3insn_patch;
-       while (p < &__pause_3insn_patch_end) {
-               unsigned long i, addr = p->addr;
-
-               for (i = 0; i < 3; i++) {
-                       *(unsigned int *) (addr +  (i * 4)) = p->insns[i];
-                       wmb();
-                       __asm__ __volatile__("flush     %0"
-                                            : : "r" (addr +  (i * 4)));
-               }
-
-               p++;
-       }
-}
-
 void __init start_early_boot(void)
 {
        int cpu;
@@ -627,8 +608,6 @@ static void __init init_sparc64_elf_hwcap(void)
 
        if (sparc64_elf_hwcap & AV_SPARC_POPC)
                popc_patch();
-       if (sparc64_elf_hwcap & AV_SPARC_PAUSE)
-               pause_patch();
 }
 
 void __init alloc_irqstack_bootmem(void)
index ec931a3b362d93cbed8bd67039ed99ae864e1924..f0f4ea82ec787d5b9e676ea2803b656b25607723 100644 (file)
@@ -133,11 +133,6 @@ SECTIONS
                *(.popc_6insn_patch)
                __popc_6insn_patch_end = .;
        }
-       .pause_3insn_patch : {
-               __pause_3insn_patch = .;
-               *(.pause_3insn_patch)
-               __pause_3insn_patch_end = .;
-       }
        .sun4v_adi_1insn_patch : {
                __sun4v_adi_1insn_patch = .;
                *(.sun4v_adi_1insn_patch)