{
        void __iomem *base = host->base;
        bool sbc = (cmd == host->mrq->sbc);
+       bool busy_resp = host->variant->busy_detect &&
+                       (cmd->flags & MMC_RSP_BUSY);
+
+       /* Check if we need to wait for busy completion. */
+       if (host->busy_status && (status & MCI_ST_CARDBUSY))
+               return;
+
+       /* Enable busy completion if needed and supported. */
+       if (!host->busy_status && busy_resp &&
+               !(status & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT)) &&
+               (readl(base + MMCISTATUS) & MCI_ST_CARDBUSY)) {
+               writel(readl(base + MMCIMASK0) | MCI_ST_BUSYEND,
+                       base + MMCIMASK0);
+               host->busy_status = status & (MCI_CMDSENT|MCI_CMDRESPEND);
+               return;
+       }
+
+       /* At busy completion, mask the IRQ and complete the request. */
+       if (host->busy_status) {
+               writel(readl(base + MMCIMASK0) & ~MCI_ST_BUSYEND,
+                       base + MMCIMASK0);
+               host->busy_status = 0;
+       }
 
        host->cmd = NULL;
 
                        status &= ~MCI_IRQ1MASK;
                }
 
+               /*
+                * We intentionally clear the MCI_ST_CARDBUSY IRQ here (if it's
+                * enabled) since the HW seems to be triggering the IRQ on both
+                * edges while monitoring DAT0 for busy completion.
+                */
                status &= readl(host->base + MMCIMASK0);
                writel(status, host->base + MMCICLEAR);
 
                dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
 
                cmd = host->cmd;
-               if (status & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|MCI_CMDSENT|
-                             MCI_CMDRESPEND) && cmd)
+               if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
+                       MCI_CMDSENT|MCI_CMDRESPEND) && cmd)
                        mmci_cmd_irq(host, cmd, status);
 
                data = host->data;
                              MCI_DATABLOCKEND) && data)
                        mmci_data_irq(host, data, status);
 
+               /* Don't poll for busy completion in irq context. */
+               if (host->busy_status)
+                       status &= ~MCI_ST_CARDBUSY;
+
                ret = 1;
        } while (status);
 
                goto clk_disable;
        }
 
-       if (variant->busy_detect) {
-               mmci_ops.card_busy = mmci_card_busy;
-               mmci_write_datactrlreg(host, MCI_ST_DPSM_BUSYMODE);
-       }
-
-       mmc->ops = &mmci_ops;
        /*
         * The ARM and ST versions of the block have slightly different
         * clock divider equations which means that the minimum divider
        mmc->caps = plat->capabilities;
        mmc->caps2 = plat->capabilities2;
 
+       if (variant->busy_detect) {
+               mmci_ops.card_busy = mmci_card_busy;
+               mmci_write_datactrlreg(host, MCI_ST_DPSM_BUSYMODE);
+               mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+               mmc->max_busy_timeout = 0;
+       }
+
+       mmc->ops = &mmci_ops;
+
        /* We support these PM capabilities. */
        mmc->pm_caps = MMC_PM_KEEP_POWER;