Initial fix for making sure that we can access percpu variables
in all C code (commit: 
10617bbe84628eb18ab5f723d3ba35005adde143)
inadvertantly allocated the memory in the "percpu" section of
the vmlinux ELF executable.  This confused kexec/dump.
Signed-off-by: Tony Luck <tony.luck@intel.com>
 #include <asm-generic/sections.h>
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___rse_patchlist[], __end___rse_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
 
        ;;
 #else
 (isAP) br.few 2f
-       mov r20=r19
-       sub r19=r19,r18
+       movl r20=__cpu0_per_cpu
        ;;
        shr.u r18=r18,3
 1:
-       ld8 r21=[r20],8;;
-       st8[r19]=r21,8
+       ld8 r21=[r19],8;;
+       st8[r20]=r21,8
        adds r18=-1,r18;;
        cmp4.lt p7,p6=0,r18
 (p7)   br.cond.dptk.few 1b
+       mov r19=r20
+       ;;
 2:
 #endif
        tpa r19=r19
 
   /* Per-cpu data: */
   percpu : { } :percpu
   . = ALIGN(PERCPU_PAGE_SIZE);
-#ifdef CONFIG_SMP
-  . = . + PERCPU_PAGE_SIZE;    /* cpu0 per-cpu space */
-#endif
   __phys_per_cpu_start = .;
   .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
        {
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
        {
+#ifdef CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+               __cpu0_per_cpu = .;
+  . = . + PERCPU_PAGE_SIZE;    /* cpu0 per-cpu space */
+#endif
                DATA_DATA
                *(.data1)
                *(.gnu.linkonce.d*)
 
         * get_zeroed_page().
         */
        if (first_time) {
-               void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+               void *cpu0_data = __cpu0_per_cpu;
 
                first_time=0;
 
 
 
        for_each_possible_early_cpu(cpu) {
                if (cpu == 0) {
-                       void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+                       void *cpu0_data = __cpu0_per_cpu;
                        __per_cpu_offset[cpu] = (char*)cpu0_data -
                                __per_cpu_start;
                } else if (node == node_cpuid[cpu].nid) {