Impact: __per_cpu_load available on all SMP capable archs
Percpu now requires three symbols to be defined - __per_cpu_load,
__per_cpu_start and __per_cpu_end.  There were three archs which
didn't have it.  Update them as follows.
* powerpc: can use generic PERCPU() macro.  Compile tested for
  powerpc32, compile/boot tested for powerpc64.
* ia64: can use generic PERCPU_VADDR() macro.  __phys_per_cpu_start is
  identical to __per_cpu_load.  Compile tested and symbol table looks
  identical after the change except for the additional __per_cpu_load.
* arm: added explicit __per_cpu_load definition.  Currently uses
  unified .init output section so can't use the generic macro.  Dunno
  whether the unified .init ouput section is required by arch
  peculiarity so I left it alone.  Please break it up and use PERCPU()
  if possible.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Pat Gefre <pfg@sgi.com>
Cc: Russell King <rmk@arm.linux.org.uk>
                __initramfs_end = .;
 #endif
                . = ALIGN(4096);
+               __per_cpu_load = .;
                __per_cpu_start = .;
                        *(.data.percpu.page_aligned)
                        *(.data.percpu)
 
         { *(.data.cacheline_aligned) }
 
   /* Per-cpu data: */
-  percpu : { } :percpu
   . = ALIGN(PERCPU_PAGE_SIZE);
-  __phys_per_cpu_start = .;
-  .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
-       {
-               __per_cpu_start = .;
-               *(.data.percpu.page_aligned)
-               *(.data.percpu)
-               *(.data.percpu.shared_aligned)
-               __per_cpu_end = .;
-       }
+  PERCPU_VADDR(PERCPU_ADDR, :percpu)
+  __phys_per_cpu_start = __per_cpu_load;
   . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
                                                 * into percpu page size
                                                 */
 
                __initramfs_end = .;
        }
 #endif
-       . = ALIGN(PAGE_SIZE);
-       .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
-               __per_cpu_start = .;
-               *(.data.percpu.page_aligned)
-               *(.data.percpu)
-               *(.data.percpu.shared_aligned)
-               __per_cpu_end = .;
-       }
+       PERCPU(PAGE_SIZE)
 
        . = ALIGN(8);
        .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {