Testing with a threaded version of mmap_bench which allocate 1G chunks and
with large number of threads we find:
without patch
    32.72%  mmap_bench  [kernel.vmlinux]            [k] do_raw_spin_lock
            |
            ---do_raw_spin_lock
               |
                --32.68%--0
                          |
                          |--15.82%--pte_fragment_alloc
                          |          |
                          |           --15.79%--do_huge_pmd_anonymous_page
                          |                     __handle_mm_fault
                          |                     handle_mm_fault
                          |                     __do_page_fault
                          |                     handle_page_fault
                          |                     test_mmap
                          |                     test_mmap
                          |                     start_thread
                          |                     __clone
                          |
                          |--14.95%--do_huge_pmd_anonymous_page
                          |          __handle_mm_fault
                          |          handle_mm_fault
                          |          __do_page_fault
                          |          handle_page_fault
                          |          test_mmap
                          |          test_mmap
                          |          start_thread
                          |          __clone
                          |
with patch
    12.89%  mmap_bench  [kernel.vmlinux]            [k] do_raw_spin_lock
            |
            ---do_raw_spin_lock
               |
                --12.83%--0
                          |
                          |--3.21%--pagevec_lru_move_fn
                          |          __lru_cache_add
                          |          |
                          |           --2.74%--do_huge_pmd_anonymous_page
                          |                     __handle_mm_fault
                          |                     handle_mm_fault
                          |                     __do_page_fault
                          |                     handle_page_fault
                          |                     test_mmap
                          |                     test_mmap
                          |                     start_thread
                          |                     __clone
                          |
                          |--3.11%--do_huge_pmd_anonymous_page
                          |          __handle_mm_fault
                          |          handle_mm_fault
                          |          __do_page_fault
                          |          handle_page_fault
                          |          test_mmap
                          |          test_mmap
                          |          start_thread
                          |          __clone
.....
                          |
                           --0.55%--pte_fragment_alloc
                                     |
                                      --0.55%--do_huge_pmd_anonymous_page
                                                __handle_mm_fault
                                                handle_mm_fault
                                                __do_page_fault
                                                handle_page_fault
                                                test_mmap
                                                test_mmap
                                                start_thread
                                                __clone
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
        def_bool y
        depends on PPC_STD_MMU && PPC32
 
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+       def_bool y
+       depends on PPC_BOOK3S_64
+
 config PPC_RADIX_MMU
        bool "Radix MMU Support"
        depends on PPC_BOOK3S_64