#include <linux/bio.h>
 #include <linux/dma-mapping.h>
 #include <linux/idr.h>
+#include <linux/kthread.h>
 #include <../drivers/ata/ahci.h>
 #include "mtip32xx.h"
 
 };
 #endif
 
-static int mtip_exec_internal_command(struct mtip_port *port,
-                               void *fis,
-                               int fisLen,
-                               dma_addr_t buffer,
-                               int bufLen,
-                               u32 opts,
-                               gfp_t atomic,
-                               unsigned long timeout);
-
 /*
  * This function check_for_surprise_removal is called
  * while card is removed from the system and it will
                         port->mmio + PORT_FIS_ADDR_HI);
        }
 
-       writel(port->command_list_dma & 0xffffffff,
+       writel(port->command_list_dma & 0xFFFFFFFF,
                        port->mmio + PORT_LST_ADDR);
-       writel(port->rxfis_dma & 0xffffffff, port->mmio + PORT_FIS_ADDR);
+       writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
 
        /* Clear SError */
        writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
                if (atomic_read(&port->commands[tag].active) &&
                   (time_after(jiffies, port->commands[tag].comp_time))) {
                        group = tag >> 5;
-                       bit = tag & 0x1f;
+                       bit = tag & 0x1F;
 
                        command = &port->commands[tag];
                        fis = (struct host_to_dev_fis *) command->command;
 
                        cmdto_cnt++;
                        if (cmdto_cnt == 1)
-                               atomic_inc(&port->dd->eh_active);
+                               set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
 
                        /*
                         * Clear the completed bit. This should prevent
                        "%d commands timed out: restarting port",
                        cmdto_cnt);
                mtip_restart_port(port);
-               atomic_dec(&port->dd->eh_active);
+               clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+               wake_up_interruptible(&port->svc_wait);
        }
 
        /* Restart the timer */
        del_timer(&port->cmd_timer);
 
        /* Set eh_active */
-       atomic_inc(&dd->eh_active);
+       set_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
 
        /* Loop through all the groups */
        for (group = 0; group < dd->slot_groups; group++) {
        }
        print_tags(dd, "TFE tags reissued:", tagaccum);
 
-       /* Decrement eh_active */
-       atomic_dec(&dd->eh_active);
+       /* clear eh_active */
+       clear_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags);
+       wake_up_interruptible(&port->svc_wait);
 
        mod_timer(&port->cmd_timer,
                 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
                                        continue;
 
                                command = &port->commands[tag];
-
                                /* make internal callback */
                                if (likely(command->comp_func)) {
                                        command->comp_func(
        struct mtip_port *port = dd->port;
        struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
 
-       if (port->internal_cmd_in_progress &&
-           cmd != NULL &&
-           !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
+       if (test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
+           (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
                & (1 << MTIP_TAG_INTERNAL))) {
                if (cmd->comp_func) {
                        cmd->comp_func(port,
 
        to = jiffies + msecs_to_jiffies(timeout);
        do {
+               if (test_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags)) {
+                       msleep(20);
+                       continue; /* svc thd is actively issuing commands */
+               }
                /*
                 * Ignore s_active bit 0 of array element 0.
                 * This bit will always be set
                 */
-               active = readl(port->s_active[0]) & 0xfffffffe;
+               active = readl(port->s_active[0]) & 0xFFFFFFFE;
                for (n = 1; n < port->dd->slot_groups; n++)
                        active |= readl(port->s_active[n]);
 
  *
  * @port    Pointer to the port data structure.
  * @fis     Pointer to the FIS that describes the command.
- * @fisLen  Length in WORDS of the FIS.
+ * @fis_len  Length in WORDS of the FIS.
  * @buffer  DMA accessible for command data.
- * @bufLen  Length, in bytes, of the data buffer.
+ * @buf_len  Length, in bytes, of the data buffer.
  * @opts    Command header options, excluding the FIS length
  *             and the number of PRD entries.
  * @timeout Time in ms to wait for the command to complete.
  */
 static int mtip_exec_internal_command(struct mtip_port *port,
                                        void *fis,
-                                       int fisLen,
+                                       int fis_len,
                                        dma_addr_t buffer,
-                                       int bufLen,
+                                       int buf_len,
                                        u32 opts,
                                        gfp_t atomic,
                                        unsigned long timeout)
                        "Internal command already active\n");
                return -EBUSY;
        }
-       port->internal_cmd_in_progress = 1;
+       set_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
 
        if (atomic == GFP_KERNEL) {
                /* wait for io to complete if non atomic */
                        dev_warn(&port->dd->pdev->dev,
                                "Failed to quiesce IO\n");
                        release_slot(port, MTIP_TAG_INTERNAL);
-                       port->internal_cmd_in_progress = 0;
+                       clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
+                       wake_up_interruptible(&port->svc_wait);
                        return -EBUSY;
                }
 
        }
 
        /* Copy the command to the command table */
-       memcpy(int_cmd->command, fis, fisLen*4);
+       memcpy(int_cmd->command, fis, fis_len*4);
 
        /* Populate the SG list */
        int_cmd->command_header->opts =
-                cpu_to_le32(opts | fisLen);
-       if (bufLen) {
+                __force_bit2int cpu_to_le32(opts | fis_len);
+       if (buf_len) {
                command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
 
-               command_sg->info = cpu_to_le32((bufLen-1) & 0x3fffff);
-               command_sg->dba = cpu_to_le32(buffer & 0xffffffff);
-               command_sg->dba_upper = cpu_to_le32((buffer >> 16) >> 16);
+               command_sg->info =
+                       __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
+               command_sg->dba =
+                       __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
+               command_sg->dba_upper =
+                       __force_bit2int cpu_to_le32((buffer >> 16) >> 16);
 
-               int_cmd->command_header->opts |= cpu_to_le32((1 << 16));
+               int_cmd->command_header->opts |=
+                       __force_bit2int cpu_to_le32((1 << 16));
        }
 
        /* Populate the command header */
                                &wait,
                                msecs_to_jiffies(timeout)) == 0) {
                        dev_err(&port->dd->pdev->dev,
-                               "Internal command did not complete [%d]\n",
-                               atomic);
+                               "Internal command did not complete [%d] "
+                               "within timeout of  %lu ms\n",
+                               atomic, timeout);
                        rv = -EAGAIN;
                }
 
        /* Clear the allocated and active bits for the internal command. */
        atomic_set(&int_cmd->active, 0);
        release_slot(port, MTIP_TAG_INTERNAL);
-       port->internal_cmd_in_progress = 0;
+       clear_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags);
+       wake_up_interruptible(&port->svc_wait);
 
        return rv;
 }
        int rv = 0;
        struct host_to_dev_fis fis;
 
-       down_write(&port->dd->internal_sem);
-
        /* Build the FIS. */
        memset(&fis, 0, sizeof(struct host_to_dev_fis));
        fis.type        = 0x27;
        }
 
 out:
-       up_write(&port->dd->internal_sem);
        return rv;
 }
 
        int rv;
        struct host_to_dev_fis  fis;
 
-       down_write(&port->dd->internal_sem);
-
        /* Build the FIS. */
        memset(&fis, 0, sizeof(struct host_to_dev_fis));
        fis.type        = 0x27;
                                        GFP_KERNEL,
                                        15000);
 
-       up_write(&port->dd->internal_sem);
-
        return rv;
 }
 
                         ((u64)sectors) * ATA_SECT_SIZE >> 20);
 
        pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
-       switch (revid & 0xff) {
+       switch (revid & 0xFF) {
        case 0x1:
                strlcpy(cbuf, "A0", 3);
                break;
                if (dma_len > 0x400000)
                        dev_err(&dd->pdev->dev,
                                "DMA segment length truncated\n");
-               command_sg->info = cpu_to_le32((dma_len-1) & 0x3fffff);
-#if (BITS_PER_LONG == 64)
-               *((unsigned long *) &command_sg->dba) =
-                        cpu_to_le64(sg_dma_address(sg));
-#else
-               command_sg->dba = cpu_to_le32(sg_dma_address(sg));
-               command_sg->dba_upper   =
-                        cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
-#endif
+               command_sg->info = __force_bit2int
+                       cpu_to_le32((dma_len-1) & 0x3FFFFF);
+               command_sg->dba = __force_bit2int
+                       cpu_to_le32(sg_dma_address(sg));
+               command_sg->dba_upper = __force_bit2int
+                       cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
                command_sg++;
                sg++;
        }
        struct host_to_dev_fis  fis;
        struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
 
-       /* Lock the internal command semaphore. */
-       down_write(&port->dd->internal_sem);
-
        /* Build the FIS. */
        memset(&fis, 0, sizeof(struct host_to_dev_fis));
        fis.type        = 0x27;
                                 0,
                                 GFP_KERNEL,
                                 MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
-               up_write(&port->dd->internal_sem);
                return -1;
        }
 
                command[4],
                command[5]);
 
-       up_write(&port->dd->internal_sem);
        return 0;
 }
 
        struct host_to_dev_fis  fis;
        struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
 
-       /* Lock the internal command semaphore. */
-       down_write(&port->dd->internal_sem);
-
        /* Build the FIS. */
        memset(&fis, 0, sizeof(struct host_to_dev_fis));
        fis.type                = 0x27;
        fis.sect_count  = command[3];
        if (fis.command == ATA_CMD_SMART) {
                fis.sector      = command[1];
-               fis.cyl_low     = 0x4f;
-               fis.cyl_hi      = 0xc2;
+               fis.cyl_low     = 0x4F;
+               fis.cyl_hi      = 0xC2;
        }
 
        dbg_printk(MTIP_DRV_NAME
                                 GFP_KERNEL,
                                 MTIP_IOCTL_COMMAND_TIMEOUT_MS)
                                 < 0) {
-               up_write(&port->dd->internal_sem);
                return -1;
        }
 
                if (copy_to_user(user_buffer,
                                 port->sector_buffer,
                                 ATA_SECT_SIZE * command[3])) {
-                       up_write(&port->dd->internal_sem);
                        return -EFAULT;
                }
        }
 
-       up_write(&port->dd->internal_sem);
        return 0;
 }
 
 
        /* list of commands that have an implicit sector count of 1 */
        switch (command) {
-       case 0xF1:
-       case 0xF2:
-       case 0xF3:
-       case 0xF4:
-       case 0xF5:
-       case 0xF6:
-       case 0xE4:
-       case 0xE8:
+       case ATA_CMD_SEC_SET_PASS:
+       case ATA_CMD_SEC_UNLOCK:
+       case ATA_CMD_SEC_ERASE_PREP:
+       case ATA_CMD_SEC_ERASE_UNIT:
+       case ATA_CMD_SEC_FREEZE_LOCK:
+       case ATA_CMD_SEC_DISABLE_PASS:
+       case ATA_CMD_PMP_READ:
+       case ATA_CMD_PMP_WRITE:
                rv = 1;
                break;
-       case 0xF9:
-               if (features == 0x03)
+       case ATA_CMD_SET_MAX:
+               if (features == ATA_SET_MAX_UNLOCK)
                        rv = 1;
                break;
-       case 0xB0:
-               if ((features == 0xD0) || (features == 0xD1))
+       case ATA_CMD_SMART:
+               if ((features == ATA_SMART_READ_VALUES) ||
+                               (features == ATA_SMART_READ_THRESHOLDS))
                        rv = 1;
                break;
-       case 0xB1:
-               if ((features == 0xC2) || (features == 0xC3))
+       case ATA_CMD_CONF_OVERLAY:
+               if ((features == ATA_DCO_IDENTIFY) ||
+                               (features == ATA_DCO_SET))
                        rv = 1;
                break;
        }
                goto abort;
        }
 
-       /* Lock the internal command semaphore. */
-       down_write(&dd->internal_sem);
-
        /* Build the FIS. */
        memset(&fis, 0, sizeof(struct host_to_dev_fis));
 
                                dev_warn(&dd->pdev->dev,
                                        "data movement but "
                                        "sect_count is 0\n");
-                                       up_write(&dd->internal_sem);
                                        err = -EINVAL;
                                        goto abort;
                        }
                fis.device);
 
        switch (fis.command) {
-       case 0x92: /* Change timeout for Download Microcode to 60 seconds.*/
+       case ATA_CMD_DOWNLOAD_MICRO:
+               /* Change timeout for Download Microcode to 60 seconds.*/
                timeout = 60000;
                break;
-       case 0xf4: /* Change timeout for Security Erase Unit to 4 minutes.*/
+       case ATA_CMD_SEC_ERASE_UNIT:
+               /* Change timeout for Security Erase Unit to 4 minutes.*/
                timeout = 240000;
                break;
-       case 0xe0: /* Change timeout for standby immediate to 10 seconds.*/
+       case ATA_CMD_STANDBYNOW1:
+               /* Change timeout for standby immediate to 10 seconds.*/
                timeout = 10000;
                break;
-       case 0xf7: /* Change timeout for vendor unique command to 10 secs */
+       case 0xF7:
+       case 0xFA:
+               /* Change timeout for vendor unique command to 10 secs */
                timeout = 10000;
                break;
-       case 0xfa: /* Change timeout for vendor unique command to 10 secs */
+       case ATA_CMD_SMART:
+               /* Change timeout for vendor unique command to 10 secs */
                timeout = 10000;
                break;
        default:
                                 0,
                                 GFP_KERNEL,
                                 timeout) < 0) {
-               up_write(&dd->internal_sem);
                err = -EIO;
                goto abort;
        }
        }
 
        /* Com rest after secure erase or lowlevel format */
-       if (((fis.command == 0xF4) ||
+       if (((fis.command == ATA_CMD_SEC_ERASE_UNIT) ||
                ((fis.command == 0xFC) &&
                        (fis.features == 0x27 || fis.features == 0x72 ||
                         fis.features == 0x62 || fis.features == 0x26))) &&
                req_task->io_ports[5],
                req_task->io_ports[6]);
 
-       up_write(&dd->internal_sem);
-
        if (taskout) {
                if (copy_to_user(buf + outtotal, outbuf, taskout)) {
                        err = -EFAULT;
                ret = exec_drive_taskfile(dd, (void __user *) arg,
                                                &req_task, outtotal);
 
-               if (copy_to_user((void __user *) arg, &req_task, sizeof(req_task)))
+               if (copy_to_user((void __user *) arg, &req_task,
+                                                       sizeof(req_task)))
                        return -EFAULT;
 
                return ret;
        fis->opts        = 1 << 7;
        fis->command     =
                (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
-       *((unsigned int *) &fis->lba_low) = (start & 0xffffff);
-       *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xffffff);
+       *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF);
+       *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF);
        fis->device      = 1 << 6;
        if (barrier)
                fis->device |= FUA_BIT;
-       fis->features    = nsect & 0xff;
-       fis->features_ex = (nsect >> 8) & 0xff;
+       fis->features    = nsect & 0xFF;
+       fis->features_ex = (nsect >> 8) & 0xFF;
        fis->sect_count  = ((tag << 3) | (tag >> 5));
        fis->sect_cnt_ex = 0;
        fis->control     = 0;
        fill_command_sg(dd, command, nents);
 
        /* Populate the command header */
-       command->command_header->opts = cpu_to_le32(
-                       (nents << 16) | 5 | AHCI_CMD_PREFETCH);
+       command->command_header->opts =
+                       __force_bit2int cpu_to_le32(
+                               (nents << 16) | 5 | AHCI_CMD_PREFETCH);
        command->command_header->byte_count = 0;
 
        /*
        command->async_callback = callback;
 
        /*
-        * Lock used to prevent this command from being issued
-        * if an internal command is in progress.
+        * To prevent this command from being issued
+        * if an internal command is in progress or error handling is active.
         */
-       down_read(&port->dd->internal_sem);
+       if (unlikely(test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) ||
+                       test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags))) {
+               set_bit(tag, port->cmds_to_issue);
+               set_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
+               return;
+       }
 
        /* Issue the command to the hardware */
        mtip_issue_ncq_command(port, tag);
        /* Set the command's timeout value.*/
        port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
                                        MTIP_NCQ_COMMAND_TIMEOUT_MS);
-
-       up_read(&port->dd->internal_sem);
 }
 
 /*
        timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
 
        do {
-#ifdef CONFIG_HOTPLUG
                if (mtip_check_surprise_removal(dd->pdev))
                        return -EFAULT;
-#endif
+
                if (mtip_get_identify(dd->port, NULL) < 0)
                        return -EFAULT;
 
        return 0;
 }
 
+/*
+ * service thread to issue queued commands
+ *
+ * @data Pointer to the driver data structure.
+ *
+ * return value
+ *     0
+ */
+
+static int mtip_service_thread(void *data)
+{
+       struct driver_data *dd = (struct driver_data *)data;
+       unsigned long slot, slot_start, slot_wrap;
+       unsigned int num_cmd_slots = dd->slot_groups * 32;
+       struct mtip_port *port = dd->port;
+
+       while (1) {
+               /*
+                * the condition is to check neither an internal command is
+                * is in progress nor error handling is active
+                */
+               wait_event_interruptible(port->svc_wait, (port->flags) &&
+                       !test_bit(MTIP_FLAG_IC_ACTIVE_BIT, &port->flags) &&
+                       !test_bit(MTIP_FLAG_EH_ACTIVE_BIT, &port->flags));
+
+               if (kthread_should_stop())
+                       break;
+
+               if (test_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags)) {
+                       set_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
+                       slot = 1;
+                       /* used to restrict the loop to one iteration */
+                       slot_start = num_cmd_slots;
+                       slot_wrap = 0;
+                       while (1) {
+                               slot = find_next_bit(port->cmds_to_issue,
+                                               num_cmd_slots, slot);
+                               if (slot_wrap == 1) {
+                                       if ((slot_start >= slot) ||
+                                               (slot >= num_cmd_slots))
+                                               break;
+                               }
+                               if (unlikely(slot_start == num_cmd_slots))
+                                       slot_start = slot;
+
+                               if (unlikely(slot == num_cmd_slots)) {
+                                       slot = 1;
+                                       slot_wrap = 1;
+                                       continue;
+                               }
+
+                               /* Issue the command to the hardware */
+                               mtip_issue_ncq_command(port, slot);
+
+                               /* Set the command's timeout value.*/
+                               port->commands[slot].comp_time = jiffies +
+                               msecs_to_jiffies(MTIP_NCQ_COMMAND_TIMEOUT_MS);
+
+                               clear_bit(slot, port->cmds_to_issue);
+                       }
+
+                       clear_bit(MTIP_FLAG_ISSUE_CMDS_BIT, &port->flags);
+                       clear_bit(MTIP_FLAG_SVC_THD_ACTIVE_BIT, &port->flags);
+               }
+       }
+       return 0;
+}
+
 /*
  * Called once for each card.
  *
 
        hba_setup(dd);
 
-       /*
-        * Initialize the internal semaphore
-        * Use a rw semaphore to enable prioritization of
-        * mgmnt ioctl traffic during heavy IO load
-        */
-       init_rwsem(&dd->internal_sem);
-
        tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
 
        dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
 
                if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
                        dd->port->commands[i].command_header->ctbau =
-                       cpu_to_le32(
+                       __force_bit2int cpu_to_le32(
                        (dd->port->commands[i].command_dma >> 16) >> 16);
-               dd->port->commands[i].command_header->ctba = cpu_to_le32(
-                       dd->port->commands[i].command_dma & 0xffffffff);
+               dd->port->commands[i].command_header->ctba =
+                       __force_bit2int cpu_to_le32(
+                       dd->port->commands[i].command_dma & 0xFFFFFFFF);
 
                /*
                 * If this is not done, a bug is reported by the stock
                                        dd->mmio + HOST_CTL);
 
        init_timer(&dd->port->cmd_timer);
+       init_waitqueue_head(&dd->port->svc_wait);
+
        dd->port->cmd_timer.data = (unsigned long int) dd->port;
        dd->port->cmd_timer.function = mtip_timeout_function;
        mod_timer(&dd->port->cmd_timer,
 
        del_timer_sync(&dd->port->cmd_timer);
 
-       /* Stop the bottom half tasklet. */
-       tasklet_kill(&dd->tasklet);
-
        /* Release the IRQ. */
        devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
+       /* Stop the bottom half tasklet. */
+       tasklet_kill(&dd->tasklet);
+
        /* Free the command/command header memory. */
        dmam_free_coherent(&dd->pdev->dev,
                        HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 2),
 
        switch (cmd) {
        case BLKFLSBUF:
-               return 0;
+               return -ENOTTY;
        default:
                return mtip_hw_ioctl(dd, cmd, arg);
        }
 
        switch (cmd) {
        case BLKFLSBUF:
-               return 0;
+               return -ENOTTY;
        case HDIO_DRIVE_TASKFILE: {
-               struct mtip_compat_ide_task_request_s *compat_req_task;
+               struct mtip_compat_ide_task_request_s __user *compat_req_task;
                ide_task_request_t req_task;
                int compat_tasksize, outtotal, ret;
 
-               compat_tasksize = sizeof(struct mtip_compat_ide_task_request_s);
+               compat_tasksize =
+                       sizeof(struct mtip_compat_ide_task_request_s);
 
                compat_req_task =
                        (struct mtip_compat_ide_task_request_s __user *) arg;
 
                if (copy_from_user(&req_task, (void __user *) arg,
-                               compat_tasksize - (2 * sizeof(compat_long_t))))
+                       compat_tasksize - (2 * sizeof(compat_long_t))))
                        return -EFAULT;
 
                if (get_user(req_task.out_size, &compat_req_task->out_size))
 
        geo->heads = 224;
        geo->sectors = 56;
-#if BITS_PER_LONG == 64
-       geo->cylinders = capacity / (geo->heads * geo->sectors);
-#else
-       do_div(capacity, (geo->heads * geo->sectors));
+       sector_div(capacity, (geo->heads * geo->sectors));
        geo->cylinders = capacity;
-#endif
        return 0;
 }
 
                return;
        }
 
-       if (unlikely(atomic_read(&dd->eh_active))) {
-               bio_endio(bio, -EBUSY);
-               return;
-       }
-
        sg = mtip_hw_get_scatterlist(dd, &tag);
        if (likely(sg != NULL)) {
                blk_queue_bounce(queue, &bio);
                                tag,
                                bio_endio,
                                bio,
-                               bio->bi_rw & REQ_FLUSH,
+                               bio->bi_rw & REQ_FUA,
                                bio_data_dir(bio));
        } else
                bio_io_error(bio);
        sector_t capacity;
        unsigned int index = 0;
        struct kobject *kobj;
+       unsigned char thd_name[16];
 
        /* Initialize the protocol layer. */
        rv = mtip_hw_init(dd);
        blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
        blk_queue_physical_block_size(dd->queue, 4096);
        blk_queue_io_min(dd->queue, 4096);
+       blk_queue_flush(dd->queue, 0);
 
        dd->disk = alloc_disk(MTIP_MAX_MINORS);
        if (dd->disk  == NULL) {
                kobject_put(kobj);
        }
 
+       sprintf(thd_name, "mtip_svc_thd_%02d", index);
+
+       dd->mtip_svc_handler = kthread_run(mtip_service_thread,
+                                               dd, thd_name);
+
+       if (IS_ERR(dd->mtip_svc_handler)) {
+               printk(KERN_ERR "mtip32xx: service thread failed to start\n");
+               dd->mtip_svc_handler = NULL;
+               rv = -EFAULT;
+               goto read_capacity_error;
+       }
+
        return rv;
 
 read_capacity_error:
 static int mtip_block_remove(struct driver_data *dd)
 {
        struct kobject *kobj;
+
+       if (dd->mtip_svc_handler) {
+               set_bit(MTIP_FLAG_SVC_THD_SHOULD_STOP_BIT, &dd->port->flags);
+               wake_up_interruptible(&dd->port->svc_wait);
+               kthread_stop(dd->mtip_svc_handler);
+       }
+
        /* Clean up the sysfs attributes managed by the protocol layer. */
        kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
        if (kobj) {
        atomic_set(&dd->drv_cleanup_done, true);
 
        atomic_set(&dd->resumeflag, false);
-       atomic_set(&dd->eh_active, 0);
 
        /* Attach the private data to this PCI device.  */
        pci_set_drvdata(pdev, dd);
 
        /* Copy the info we may need later into the private data structure. */
        dd->major       = mtip_major;
-       dd->protocol    = ent->driver_data;
        dd->instance    = instance;
        dd->pdev        = pdev;