module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-               "Number of times we overflow ARS results before abort");
-
 static bool disable_vendor_specific;
 module_param(disable_vendor_specific, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_vendor_specific,
 
                mutex_lock(&acpi_desc->init_mutex);
                rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-                               work_busy(&acpi_desc->work)
+                               work_busy(&acpi_desc->dwork.work)
                                && !acpi_desc->cancel ? "+\n" : "\n");
                mutex_unlock(&acpi_desc->init_mutex);
        }
        memset(&ars_start, 0, sizeof(ars_start));
        ars_start.address = spa->address;
        ars_start.length = spa->length;
-       ars_start.flags = acpi_desc->ars_start_flags;
+       if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+               ars_start.flags = ND_ARS_RETURN_PREV_DATA;
        if (nfit_spa_type(spa) == NFIT_SPA_PM)
                ars_start.type = ND_ARS_PERSISTENT;
        else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
        return cmd_rc;
 }
 
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_region *nd_region = nfit_spa->nd_region;
+       struct device *dev;
+
+       if ((ars_status->address >= spa->address && ars_status->address
+                               < spa->address + spa->length)
+                       || (ars_status->address < spa->address)) {
+               /*
+                * Assume that if a scrub starts at an offset from the
+                * start of nfit_spa that we are in the continuation
+                * case.
+                *
+                * Otherwise, if the scrub covers the spa range, mark
+                * any pending request complete.
+                */
+               if (ars_status->address + ars_status->length
+                               >= spa->address + spa->length)
+                               /* complete */;
+               else
+                       return;
+       } else
+               return;
+
+       if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+               return;
+
+       if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+               return;
+
+       if (nd_region) {
+               dev = nd_region_dev(nd_region);
+               nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+       } else
+               dev = acpi_desc->dev;
+
+       dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+                       test_bit(ARS_SHORT, &nfit_spa->ars_state)
+                       ? "short" : "long");
+       clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+       set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
 static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
        struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
                return -ENOMEM;
 
        rc = ars_get_status(acpi_desc);
+
        if (rc < 0 && rc != -ENOSPC)
                return rc;
 
        return 0;
 }
 
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+               int *query_rc)
 {
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       unsigned int overflow_retry = scrub_overflow_abort;
-       u64 init_ars_start = 0, init_ars_len = 0;
-       struct device *dev = acpi_desc->dev;
-       unsigned int tmo = scrub_timeout;
-       int rc;
-
-       if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region)
-               return;
-
-       rc = ars_start(acpi_desc, nfit_spa);
-       /*
-        * If we timed out the initial scan we'll still be busy here,
-        * and will wait another timeout before giving up permanently.
-        */
-       if (rc < 0 && rc != -EBUSY)
-               return;
+       int rc = *query_rc;
 
-       do {
-               u64 ars_start, ars_len;
-
-               if (acpi_desc->cancel)
-                       break;
-               rc = acpi_nfit_query_poison(acpi_desc);
-               if (rc == -ENOTTY)
-                       break;
-               if (rc == -EBUSY && !tmo) {
-                       dev_warn(dev, "range %d ars timeout, aborting\n",
-                                       spa->range_index);
-                       break;
-               }
+       set_bit(ARS_REQ, &nfit_spa->ars_state);
+       set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
+       switch (rc) {
+       case 0:
+       case -EAGAIN:
+               rc = ars_start(acpi_desc, nfit_spa);
                if (rc == -EBUSY) {
-                       /*
-                        * Note, entries may be appended to the list
-                        * while the lock is dropped, but the workqueue
-                        * being active prevents entries being deleted /
-                        * freed.
-                        */
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       mutex_lock(&acpi_desc->init_mutex);
-                       continue;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       if (!init_ars_len) {
-                               init_ars_len = acpi_desc->ars_status->length;
-                               init_ars_start = acpi_desc->ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-               }
-
-               if (rc < 0) {
-                       dev_warn(dev, "range %d ars continuation failed\n",
-                                       spa->range_index);
+                       *query_rc = rc;
                        break;
-               }
-
-               if (init_ars_len) {
-                       ars_start = init_ars_start;
-                       ars_len = init_ars_len;
+               } else if (rc == 0) {
+                       rc = acpi_nfit_query_poison(acpi_desc);
                } else {
-                       ars_start = acpi_desc->ars_status->address;
-                       ars_len = acpi_desc->ars_status->length;
+                       set_bit(ARS_FAILED, &nfit_spa->ars_state);
+                       break;
                }
-               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-                               spa->range_index, ars_start, ars_len);
-               /* notify the region about new poison entries */
-               nvdimm_region_notify(nfit_spa->nd_region,
-                               NVDIMM_REVALIDATE_POISON);
+               if (rc == -EAGAIN)
+                       clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+               else if (rc == 0)
+                       ars_complete(acpi_desc, nfit_spa);
+               break;
+       case -EBUSY:
+       case -ENOSPC:
                break;
-       } while (1);
+       default:
+               set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               break;
+       }
+
+       if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+               set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+       return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
 {
-       struct device *dev;
-       u64 init_scrub_length = 0;
        struct nfit_spa *nfit_spa;
-       u64 init_scrub_address = 0;
-       bool init_ars_done = false;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned int tmo = scrub_timeout;
-       unsigned int overflow_retry = scrub_overflow_abort;
-
-       acpi_desc = container_of(work, typeof(*acpi_desc), work);
-       dev = acpi_desc->dev;
-
-       /*
-        * We scrub in 2 phases.  The first phase waits for any platform
-        * firmware initiated scrubs to complete and then we go search for the
-        * affected spa regions to mark them scanned.  In the second phase we
-        * initiate a directed scrub for every range that was not scrubbed in
-        * phase 1. If we're called for a 'rescan', we harmlessly pass through
-        * the first phase, but really only care about running phase 2, where
-        * regions can be notified of new poison.
-        */
 
-       /* process platform firmware initiated scrubs */
- retry:
-       mutex_lock(&acpi_desc->init_mutex);
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct nd_cmd_ars_status *ars_status;
-               struct acpi_nfit_system_address *spa;
-               u64 ars_start, ars_len;
-               int rc;
-
-               if (acpi_desc->cancel)
-                       break;
-
-               if (nfit_spa->nd_region)
-                       continue;
-
-               if (init_ars_done) {
-                       /*
-                        * No need to re-query, we're now just
-                        * reconciling all the ranges covered by the
-                        * initial scrub
-                        */
-                       rc = 0;
-               } else
-                       rc = acpi_nfit_query_poison(acpi_desc);
-
-               if (rc == -ENOTTY) {
-                       /* no ars capability, just register spa and move on */
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
                        continue;
-               }
-
-               if (rc == -EBUSY && !tmo) {
-                       /* fallthrough to directed scrub in phase 2 */
-                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-                       break;
-               } else if (rc == -EBUSY) {
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       goto retry;
-               }
+               ars_complete(acpi_desc, nfit_spa);
+       }
+}
 
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       ars_status = acpi_desc->ars_status;
-                       /*
-                        * Record the original scrub range, so that we
-                        * can recall all the ranges impacted by the
-                        * initial scrub.
-                        */
-                       if (!init_scrub_length) {
-                               init_scrub_length = ars_status->length;
-                               init_scrub_address = ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-                       if (rc == 0) {
-                               mutex_unlock(&acpi_desc->init_mutex);
-                               goto retry;
-                       }
-               }
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+               int query_rc)
+{
+       unsigned int tmo = acpi_desc->scrub_tmo;
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
 
-               if (rc < 0) {
-                       /*
-                        * Initial scrub failed, we'll give it one more
-                        * try below...
-                        */
-                       break;
-               }
+       if (acpi_desc->cancel)
+               return 0;
 
-               /* We got some final results, record completed ranges */
-               ars_status = acpi_desc->ars_status;
-               if (init_scrub_length) {
-                       ars_start = init_scrub_address;
-                       ars_len = ars_start + init_scrub_length;
-               } else {
-                       ars_start = ars_status->address;
-                       ars_len = ars_status->length;
-               }
-               spa = nfit_spa->spa;
+       if (query_rc == -EBUSY) {
+               dev_dbg(dev, "ARS: ARS busy\n");
+               return min(30U * 60U, tmo * 2);
+       }
+       if (query_rc == -ENOSPC) {
+               dev_dbg(dev, "ARS: ARS continue\n");
+               ars_continue(acpi_desc);
+               return 1;
+       }
+       if (query_rc && query_rc != -EAGAIN) {
+               unsigned long long addr, end;
 
-               if (!init_ars_done) {
-                       init_ars_done = true;
-                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-                                       ars_start, ars_len);
-               }
-               if (ars_start <= spa->address && ars_start + ars_len
-                               >= spa->address + spa->length)
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               addr = acpi_desc->ars_status->address;
+               end = addr + acpi_desc->ars_status->length;
+               dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+                               query_rc);
        }
 
-       /*
-        * For all the ranges not covered by an initial scrub we still
-        * want to see if there are errors, but it's ok to discover them
-        * asynchronously.
-        */
+       ars_complete_all(acpi_desc);
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               /*
-                * Flag all the ranges that still need scrubbing, but
-                * register them now to make data available.
-                */
-               if (!nfit_spa->nd_region) {
-                       set_bit(ARS_REQ, &nfit_spa->ars_state);
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+                       continue;
+               if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+                       int rc = ars_start(acpi_desc, nfit_spa);
+
+                       clear_bit(ARS_DONE, &nfit_spa->ars_state);
+                       dev = nd_region_dev(nfit_spa->nd_region);
+                       dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+                                       nfit_spa->spa->range_index, rc);
+                       if (rc == 0 || rc == -EBUSY)
+                               return 1;
+                       dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+                                       nfit_spa->spa->range_index, rc);
+                       set_bit(ARS_FAILED, &nfit_spa->ars_state);
                }
        }
-       acpi_desc->init_complete = 1;
+       return 0;
+}
 
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-       acpi_desc->scrub_count++;
-       acpi_desc->ars_start_flags = 0;
-       if (acpi_desc->scrub_count_state)
-               sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+       struct acpi_nfit_desc *acpi_desc;
+       unsigned int tmo;
+       int query_rc;
+
+       acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+       mutex_lock(&acpi_desc->init_mutex);
+       query_rc = acpi_nfit_query_poison(acpi_desc);
+       tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+       if (tmo) {
+               queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+               acpi_desc->scrub_tmo = tmo;
+       } else {
+               acpi_desc->scrub_count++;
+               if (acpi_desc->scrub_count_state)
+                       sysfs_notify_dirent(acpi_desc->scrub_count_state);
+       }
+       memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
        mutex_unlock(&acpi_desc->init_mutex);
 }
 
        nfit_spa->max_ars = ars_cap.max_ars_out;
        nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
        acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+       clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+       set_bit(ARS_REQ, &nfit_spa->ars_state);
 }
 
-
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
        struct nfit_spa *nfit_spa;
+       int rc, query_rc;
 
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               int rc, type = nfit_spa_type(nfit_spa->spa);
-
-               /* PMEM and VMEM will be registered by the ARS workqueue */
-               if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) {
+               set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
                        acpi_nfit_init_ars(acpi_desc, nfit_spa);
-                       continue;
+                       break;
                }
-               /* BLK apertures belong to BLK region registration below */
-               if (type == NFIT_SPA_BDW)
-                       continue;
-               /* BLK regions don't need to wait for ARS results */
-               rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
-               if (rc)
-                       return rc;
        }
 
-       acpi_desc->ars_start_flags = 0;
-       if (!acpi_desc->cancel)
-               queue_work(nfit_wq, &acpi_desc->work);
+       /*
+        * Reap any results that might be pending before starting new
+        * short requests.
+        */
+       query_rc = acpi_nfit_query_poison(acpi_desc);
+       if (query_rc == 0)
+               ars_complete_all(acpi_desc);
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
+                       /* register regions and kick off initial ARS run */
+                       rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+                       if (rc)
+                               return rc;
+                       break;
+               case NFIT_SPA_BDW:
+                       /* nothing to register */
+                       break;
+               case NFIT_SPA_DCR:
+               case NFIT_SPA_VDISK:
+               case NFIT_SPA_VCD:
+               case NFIT_SPA_PDISK:
+               case NFIT_SPA_PCD:
+                       /* register known regions that don't support ARS */
+                       rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+                       if (rc)
+                               return rc;
+                       break;
+               default:
+                       /* don't register unknown regions */
+                       break;
+               }
+
+       queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
        return 0;
 }
 
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
-struct acpi_nfit_flush_work {
-       struct work_struct work;
-       struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-       struct acpi_nfit_flush_work *flush;
-
-       flush = container_of(work, typeof(*flush), work);
-       complete(&flush->cmp);
-}
-
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
        struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
        struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_flush_work flush;
-       int rc;
 
-       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+       /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
        device_lock(dev);
        device_unlock(dev);
 
-       /* bounce the init_mutex to make init_complete valid */
+       /* Bounce the init_mutex to complete initial registration */
        mutex_lock(&acpi_desc->init_mutex);
-       if (acpi_desc->cancel || acpi_desc->init_complete) {
-               mutex_unlock(&acpi_desc->init_mutex);
-               return 0;
-       }
-
-       /*
-        * Scrub work could take 10s of seconds, userspace may give up so we
-        * need to be interruptible while waiting.
-        */
-       INIT_WORK_ONSTACK(&flush.work, flush_probe);
-       init_completion(&flush.cmp);
-       queue_work(nfit_wq, &flush.work);
        mutex_unlock(&acpi_desc->init_mutex);
 
-       rc = wait_for_completion_interruptible(&flush.cmp);
-       cancel_work_sync(&flush.work);
-       return rc;
+       return 0;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
         * just needs guarantees that any ars it initiates are not
         * interrupted by any intervening start reqeusts from userspace.
         */
-       if (work_busy(&acpi_desc->work))
+       if (work_busy(&acpi_desc->dwork.work))
                return -EBUSY;
 
        return 0;
 int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
        struct device *dev = acpi_desc->dev;
+       int scheduled = 0, busy = 0;
        struct nfit_spa *nfit_spa;
 
-       if (work_busy(&acpi_desc->work))
-               return -EBUSY;
-
        mutex_lock(&acpi_desc->init_mutex);
        if (acpi_desc->cancel) {
                mutex_unlock(&acpi_desc->init_mutex);
        }
 
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct acpi_nfit_system_address *spa = nfit_spa->spa;
+               int type = nfit_spa_type(nfit_spa->spa);
 
-               if (nfit_spa_type(spa) != NFIT_SPA_PM)
+               if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+                       continue;
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
                        continue;
 
-               set_bit(ARS_REQ, &nfit_spa->ars_state);
+               if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+                       busy++;
+               else {
+                       if (test_bit(ARS_SHORT, &flags))
+                               set_bit(ARS_SHORT, &nfit_spa->ars_state);
+                       scheduled++;
+               }
+       }
+       if (scheduled) {
+               queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+               dev_dbg(dev, "ars_scan triggered\n");
        }
-       acpi_desc->ars_start_flags = 0;
-       if (test_bit(ARS_SHORT, &flags))
-               acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA;
-       queue_work(nfit_wq, &acpi_desc->work);
-       dev_dbg(dev, "ars_scan triggered\n");
        mutex_unlock(&acpi_desc->init_mutex);
 
-       return 0;
+       if (scheduled)
+               return 0;
+       if (busy)
+               return -EBUSY;
+       return -ENOTTY;
 }
 
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
        INIT_LIST_HEAD(&acpi_desc->dimms);
        INIT_LIST_HEAD(&acpi_desc->list);
        mutex_init(&acpi_desc->init_mutex);
-       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+       acpi_desc->scrub_tmo = 1;
+       INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
 
 
        mutex_lock(&acpi_desc->init_mutex);
        acpi_desc->cancel = 1;
+       cancel_delayed_work_sync(&acpi_desc->dwork);
        mutex_unlock(&acpi_desc->init_mutex);
 
        /*