return 0;
 }
 
-static int memory_failure_hugetlb(unsigned long pfn, int flags)
+/*
+ * Called from hugetlb code with hugetlb_lock held.
+ *
+ * Return values:
+ *   0             - free hugepage
+ *   1             - in-use hugepage
+ *   2             - not a hugepage
+ *   -EBUSY        - the hugepage is busy (try to retry)
+ *   -EHWPOISON    - the hugepage is already hwpoisoned
+ */
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+       struct page *page = pfn_to_page(pfn);
+       struct page *head = compound_head(page);
+       int ret = 2;    /* fallback to normal page handling */
+       bool count_increased = false;
+
+       if (!PageHeadHuge(head))
+               goto out;
+
+       if (flags & MF_COUNT_INCREASED) {
+               ret = 1;
+               count_increased = true;
+       } else if (HPageFreed(head) || HPageMigratable(head)) {
+               ret = get_page_unless_zero(head);
+               if (ret)
+                       count_increased = true;
+       } else {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (TestSetPageHWPoison(head)) {
+               ret = -EHWPOISON;
+               goto out;
+       }
+
+       return ret;
+out:
+       if (count_increased)
+               put_page(head);
+       return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Taking refcount of hugetlb pages needs extra care about race conditions
+ * with basic operations like hugepage allocation/free/demotion.
+ * So some of prechecks for hwpoison (pinning, and testing/setting
+ * PageHWPoison) should be done in single hugetlb_lock range.
+ */
+static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
 {
-       struct page *p = pfn_to_page(pfn);
-       struct page *head = compound_head(p);
        int res;
+       struct page *p = pfn_to_page(pfn);
+       struct page *head;
        unsigned long page_flags;
+       bool retry = true;
 
-       if (TestSetPageHWPoison(head)) {
-               pr_err("Memory failure: %#lx: already hardware poisoned\n",
-                      pfn);
-               res = -EHWPOISON;
-               if (flags & MF_ACTION_REQUIRED)
+       *hugetlb = 1;
+retry:
+       res = get_huge_page_for_hwpoison(pfn, flags);
+       if (res == 2) { /* fallback to normal page handling */
+               *hugetlb = 0;
+               return 0;
+       } else if (res == -EHWPOISON) {
+               pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+               if (flags & MF_ACTION_REQUIRED) {
+                       head = compound_head(p);
                        res = kill_accessing_process(current, page_to_pfn(head), flags);
+               }
                return res;
+       } else if (res == -EBUSY) {
+               if (retry) {
+                       retry = false;
+                       goto retry;
+               }
+               action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+               return res;
+       }
+
+       head = compound_head(p);
+       lock_page(head);
+
+       if (hwpoison_filter(p)) {
+               ClearPageHWPoison(head);
+               res = -EOPNOTSUPP;
+               goto out;
        }
 
        num_poisoned_pages_inc();
 
-       if (!(flags & MF_COUNT_INCREASED)) {
-               res = get_hwpoison_page(p, flags);
-               if (!res) {
-                       lock_page(head);
-                       if (hwpoison_filter(p)) {
-                               if (TestClearPageHWPoison(head))
-                                       num_poisoned_pages_dec();
-                               unlock_page(head);
-                               return -EOPNOTSUPP;
-                       }
-                       unlock_page(head);
-                       res = MF_FAILED;
-                       if (__page_handle_poison(p)) {
-                               page_ref_inc(p);
-                               res = MF_RECOVERED;
-                       }
-                       action_result(pfn, MF_MSG_FREE_HUGE, res);
-                       return res == MF_RECOVERED ? 0 : -EBUSY;
-               } else if (res < 0) {
-                       action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
-                       return -EBUSY;
+       /*
+        * Handling free hugepage.  The possible race with hugepage allocation
+        * or demotion can be prevented by PageHWPoison flag.
+        */
+       if (res == 0) {
+               unlock_page(head);
+               res = MF_FAILED;
+               if (__page_handle_poison(p)) {
+                       page_ref_inc(p);
+                       res = MF_RECOVERED;
                }
+               action_result(pfn, MF_MSG_FREE_HUGE, res);
+               return res == MF_RECOVERED ? 0 : -EBUSY;
        }
 
-       lock_page(head);
-
        /*
         * The page could have changed compound pages due to race window.
         * If this happens just bail out.
 
        page_flags = head->flags;
 
-       if (hwpoison_filter(p)) {
-               if (TestClearPageHWPoison(head))
-                       num_poisoned_pages_dec();
-               put_page(p);
-               res = -EOPNOTSUPP;
-               goto out;
-       }
-
        /*
         * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
         * simply disable it. In order to make it work properly, we need
        unlock_page(head);
        return res;
 }
+#else
+static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+{
+       return 0;
+}
+#endif
 
 static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
                struct dev_pagemap *pgmap)
        int res = 0;
        unsigned long page_flags;
        bool retry = true;
+       int hugetlb = 0;
 
        if (!sysctl_memory_failure_recovery)
                panic("Memory failure on page %lx", pfn);
        }
 
 try_again:
-       if (PageHuge(p)) {
-               res = memory_failure_hugetlb(pfn, flags);
+       res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
+       if (hugetlb)
                goto unlock_mutex;
-       }
 
        if (TestSetPageHWPoison(p)) {
                pr_err("Memory failure: %#lx: already hardware poisoned\n",