From: Chuck Anderson Date: Mon, 23 Jan 2017 06:40:29 +0000 (-0800) Subject: Merge branch topic/uek-4.1/upstream-cherry-picks of git://ca-git.us.oracle.com/linux... X-Git-Tag: v4.1.12-92~16 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=6bbce8b460ef3285c10a188a0e87e544c059bab9;p=users%2Fjedix%2Flinux-maple.git Merge branch topic/uek-4.1/upstream-cherry-picks of git://ca-git.us.oracle.com/linux-uek into uek/uek-4.1 * topic/uek-4.1/upstream-cherry-picks: (55 commits) userfaultfd: fix SIGBUS resulting from false rwsem wakeups userfaultfd: hugetlbfs: fix add copy_huge_page_from_user for hugetlb userfaultfd support userfaultfd: hugetlbfs: reserve count on error in __mcopy_atomic_hugetlb userfaultfd: hugetlbfs: gup: support VM_FAULT_RETRY userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges userfaultfd: hugetlbfs: add userfaultfd_hugetlb test userfaultfd: hugetlbfs: allow registration of ranges containing huge pages userfaultfd: hugetlbfs: add userfaultfd hugetlb hook userfaultfd: hugetlbfs: fix __mcopy_atomic_hugetlb retry/error processing userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb userfaultfd support mm/hugetlb: fix huge page reservation leak in private mapping error paths mm/hugetlb: fix huge page reserve accounting for private mappings userfaultfd: don't pin the user memory in userfaultfd_file_create() userfaultfd: don't block on the last VM updates at exit time sparc: add waitfd to 32 bit system call tables userfaultfd: remove kernel header include from uapi header userfaultfd: register uapi generic syscall (aarch64) userfaultfd: selftest: don't error out if pthread_mutex_t isn't identical ... Conflicts: arch/x86/syscalls/syscall_32.tbl arch/x86/syscalls/syscall_64.tbl fs/Makefile include/linux/mm_types.h mm/hugetlb.c --- 6bbce8b460ef3285c10a188a0e87e544c059bab9 diff --cc arch/x86/syscalls/syscall_32.tbl index da49ccb036bdb,dcc18ea75412d..65dc702571314 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@@ -365,6 -365,4 +365,7 @@@ 356 i386 memfd_create sys_memfd_create 357 i386 bpf sys_bpf 358 i386 execveat sys_execveat stub32_execveat + 359 i386 userfaultfd sys_userfaultfd +# This one is a temporary number, designed for no clashes. +# Nothing but DTrace should use it. +473 i386 waitfd sys_waitfd diff --cc arch/x86/syscalls/syscall_64.tbl index afa9c1378c3d8,81c490634db99..5c7cc9fbfc342 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@@ -329,9 -329,7 +329,10 @@@ 320 common kexec_file_load sys_kexec_file_load 321 common bpf sys_bpf 322 64 execveat stub_execveat + 323 common userfaultfd sys_userfaultfd +# This one is a temporary number, designed for no clashes. +# Nothing but DTrace should use it. +473 common waitfd sys_waitfd # # x32-specific system call numbers start at 512 to avoid cache impact diff --cc fs/Makefile index 5401bd793ae91,53e59b212a105..583ac5ff044b7 --- a/fs/Makefile +++ b/fs/Makefile @@@ -27,7 -27,7 +27,8 @@@ obj-$(CONFIG_ANON_INODES) += anon_inode obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o +obj-$(CONFIG_WAITFD) += waitfd.o + obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FILE_LOCKING) += locks.o diff --cc mm/huge_memory.c index 3e9b9a3a2682b,0028ba2c9cc8a..e5a8cce84a50e --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@@ -827,57 -869,10 +865,51 @@@ int do_huge_pmd_anonymous_page(struct m count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) { - put_page(page); - count_vm_event(THP_FAULT_FALLBACK); - return VM_FAULT_FALLBACK; - } - - count_vm_event(THP_FAULT_ALLOC); - return 0; + return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp, + flags); } +static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write) +{ + struct mm_struct *mm = vma->vm_mm; + pmd_t entry; + spinlock_t *ptl; + + ptl = pmd_lock(mm, pmd); + entry = pmd_mkhuge(pfn_pmd(pfn, prot)); + if (write) { + entry = pmd_mkyoung(pmd_mkdirty(entry)); + entry = maybe_pmd_mkwrite(entry, vma); + } + set_pmd_at(mm, addr, pmd, entry); + update_mmu_cache_pmd(vma, addr, pmd); + spin_unlock(ptl); +} + +int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned long pfn, bool write) +{ + pgprot_t pgprot = vma->vm_page_prot; + /* + * If we had pmd_special, we could avoid all these restrictions, + * but we need to be consistent with PTEs and architectures that + * can't support a 'special' bit. + */ + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); + BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == + (VM_PFNMAP|VM_MIXEDMAP)); + BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); + BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + if (track_pfn_insert(vma, &pgprot, pfn)) + return VM_FAULT_SIGBUS; + insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write); + return VM_FAULT_NOPAGE; +} + int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma)