/* Add all physical memory to the bootmem map, mark each area
         * present.
         */
-       for (i=0; i < lmb.memory.cnt; i++) {
-               unsigned long base, size;
-               unsigned long start_pfn, end_pfn;
-
-               base = lmb.memory.region[i].base;
-               size = lmb.memory.region[i].size;
-
-               start_pfn = base >> PAGE_SHIFT;
-               end_pfn = start_pfn + (size >> PAGE_SHIFT);
-               memory_present(0, start_pfn, end_pfn);
-
-               free_bootmem(base, size);
-       }
+       for (i=0; i < lmb.memory.cnt; i++)
+               free_bootmem(lmb_start_pfn(&lmb.memory, i),
+                            lmb_size_bytes(&lmb.memory, i));
 
        /* reserve the sections we're already using */
-       for (i=0; i < lmb.reserved.cnt; i++) {
-               unsigned long base = lmb.reserved.region[i].base;
-               unsigned long size = lmb.reserved.region[i].size;
+       for (i=0; i < lmb.reserved.cnt; i++)
+               reserve_bootmem(lmb_start_pfn(&lmb.reserved, i),
+                               lmb_size_bytes(&lmb.reserved, i));
 
-               reserve_bootmem(base, size);
-       }
+       for (i=0; i < lmb.memory.cnt; i++)
+               memory_present(0, lmb_start_pfn(&lmb.memory, i),
+                              lmb_end_pfn(&lmb.memory, i));
 }
 
 /*
 
                for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
                        numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
                                numa_domain;
-               memory_present(numa_domain, start >> PAGE_SHIFT,
-                                               (start + size) >> PAGE_SHIFT);
 
                if (--ranges)
                        goto new_range;
 
        for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
                numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
-       memory_present(0, 0, init_node_data[0].node_end_pfn);
 }
 
 static void __init dump_numa_topology(void)
                                                     size);
                        }
                }
+               /*
+                * This loop may look famaliar, but we have to do it again
+                * after marking our reserved memory to mark memory present
+                * for sparsemem.
+                */
+               addr_cells = get_mem_addr_cells();
+               size_cells = get_mem_size_cells();
+               memory = NULL;
+               while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+                       unsigned long mem_start, mem_size;
+                       int numa_domain, ranges;
+                       unsigned int *memcell_buf;
+                       unsigned int len;
+
+                       memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+                       if (!memcell_buf || len <= 0)
+                               continue;
+
+                       ranges = memory->n_addrs;       /* ranges in cell */
+new_range2:
+                       mem_start = read_n_cells(addr_cells, &memcell_buf);
+                       mem_size = read_n_cells(size_cells, &memcell_buf);
+                       if (numa_enabled) {
+                               numa_domain = of_node_numa_domain(memory);
+                               if (numa_domain  >= MAX_NUMNODES)
+                                       numa_domain = 0;
+                       } else
+                               numa_domain =  0;
+
+                       if (numa_domain != nid)
+                               continue;
+
+                       mem_size = numa_enforce_memory_limit(mem_start, mem_size);
+                       memory_present(numa_domain, mem_start >> PAGE_SHIFT,
+                                      (mem_start + mem_size) >> PAGE_SHIFT);
+
+                       if (--ranges)           /* process all ranges in cell */
+                               goto new_range2;
+               }
+
        }
 }
 
 
 
 extern unsigned long io_hole_start;
 
+static inline unsigned long
+lmb_size_bytes(struct lmb_region *type, unsigned long region_nr)
+{
+       return type->region[region_nr].size;
+}
+static inline unsigned long
+lmb_size_pages(struct lmb_region *type, unsigned long region_nr)
+{
+       return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT;
+}
+static inline unsigned long
+lmb_start_pfn(struct lmb_region *type, unsigned long region_nr)
+{
+       return type->region[region_nr].base >> PAGE_SHIFT;
+}
+static inline unsigned long
+lmb_end_pfn(struct lmb_region *type, unsigned long region_nr)
+{
+       return lmb_start_pfn(type, region_nr) +
+              lmb_size_pages(type, region_nr);
+}
+
 #endif /* _PPC64_LMB_H */
 
        unsigned long section_mem_map;
 };
 
+#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME
+/*
+ * Should we ever require GCC 4 or later then the flat array scheme
+ * can be eliminated and a uniform solution for EXTREME and !EXTREME can
+ * be arrived at.
+ */
+#define SECTION_ROOT_SHIFT     (PAGE_SHIFT-3)
+#define SECTION_ROOT_MASK      ((1UL<<SECTION_ROOT_SHIFT) - 1)
+#define SECTION_TO_ROOT(_sec)  ((_sec) >> SECTION_ROOT_SHIFT)
+#define NR_SECTION_ROOTS       (NR_MEM_SECTIONS >>  SECTION_ROOT_SHIFT)
+
+extern struct mem_section *mem_section[NR_SECTION_ROOTS];
+
+static inline struct mem_section *__nr_to_section(unsigned long nr)
+{
+       if (!mem_section[SECTION_TO_ROOT(nr)])
+               return NULL;
+       return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
+}
+
+#else
+
 extern struct mem_section mem_section[NR_MEM_SECTIONS];
 
 static inline struct mem_section *__nr_to_section(unsigned long nr)
        return &mem_section[nr];
 }
 
+#define sparse_index_init(_sec, _nid)  do {} while (0)
+
+#endif
+
 /*
  * We use the lower bits of the mem_map pointer to store
  * a little bit of information.  There should be at least
 
 static inline int valid_section(struct mem_section *section)
 {
-       return (section->section_mem_map & SECTION_MARKED_PRESENT);
+       return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
 }
 
 static inline int section_has_mem_map(struct mem_section *section)
 {
-       return (section->section_mem_map & SECTION_HAS_MEM_MAP);
+       return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
 }
 
 static inline int valid_section_nr(unsigned long nr)
 
 config HAVE_MEMORY_PRESENT
        def_bool y
        depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
+
+#
+# Architectecture platforms which require a two level mem_section in SPARSEMEM
+# must select this option. This is usually for architecture platforms with
+# an extremely sparse physical address space.
+#
+config ARCH_SPARSEMEM_EXTREME
+       def_bool n
+       depends on SPARSEMEM && 64BIT
 
  *
  * 1) mem_section      - memory sections, mem_map's for valid memory
  */
-struct mem_section mem_section[NR_MEM_SECTIONS];
+#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME
+struct mem_section *mem_section[NR_SECTION_ROOTS]
+       ____cacheline_maxaligned_in_smp;
+
+static void sparse_index_init(unsigned long section, int nid)
+{
+       unsigned long root = SECTION_TO_ROOT(section);
+
+       if (mem_section[root])
+               return;
+       mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE);
+       if (mem_section[root])
+               memset(mem_section[root], 0, PAGE_SIZE);
+       else
+               panic("memory_present: NO MEMORY\n");
+}
+#else
+struct mem_section mem_section[NR_MEM_SECTIONS]
+       ____cacheline_maxaligned_in_smp;
+#endif
 EXPORT_SYMBOL(mem_section);
 
 /* Record a memory area against a node. */
        start &= PAGE_SECTION_MASK;
        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
                unsigned long section = pfn_to_section_nr(pfn);
-               if (!mem_section[section].section_mem_map)
-                       mem_section[section].section_mem_map = SECTION_MARKED_PRESENT;
+               struct mem_section *ms;
+
+               sparse_index_init(section, nid);
+
+               ms = __nr_to_section(section);
+               if (!ms->section_mem_map)
+                       ms->section_mem_map = SECTION_MARKED_PRESENT;
        }
 }
 
 {
        struct page *map;
        int nid = early_pfn_to_nid(section_nr_to_pfn(pnum));
+       struct mem_section *ms = __nr_to_section(pnum);
 
        map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
        if (map)
                return map;
 
        printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
-       mem_section[pnum].section_mem_map = 0;
+       ms->section_mem_map = 0;
        return NULL;
 }
 
                        continue;
 
                map = sparse_early_mem_map_alloc(pnum);
-               if (map)
-                       sparse_init_one_section(&mem_section[pnum], pnum, map);
+               if (!map)
+                       continue;
+               sparse_init_one_section(__nr_to_section(pnum), pnum, map);
        }
 }