return ret;
 }
 
+static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
+{
+       iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+
+       return iommu->cmd_sem ? 0 : -ENOMEM;
+}
+
+static void __init free_cwwb_sem(struct amd_iommu *iommu)
+{
+       if (iommu->cmd_sem)
+               free_page((unsigned long)iommu->cmd_sem);
+}
+
 static void iommu_enable_xt(struct amd_iommu *iommu)
 {
 #ifdef CONFIG_IRQ_REMAP
 
 static void __init free_iommu_one(struct amd_iommu *iommu)
 {
+       free_cwwb_sem(iommu);
        free_command_buffer(iommu);
        free_event_buffer(iommu);
        free_ppr_log(iommu);
        int ret;
 
        raw_spin_lock_init(&iommu->lock);
+       iommu->cmd_sem_val = 0;
 
        /* Add IOMMU to internal data structures */
        list_add_tail(&iommu->list, &amd_iommu_list);
        if (!iommu->mmio_base)
                return -ENOMEM;
 
+       if (alloc_cwwb_sem(iommu))
+               return -ENOMEM;
+
        if (alloc_command_buffer(iommu))
                return -ENOMEM;
 
 
  *
  ****************************************************************************/
 
-static int wait_on_sem(volatile u64 *sem)
+static int wait_on_sem(struct amd_iommu *iommu, u64 data)
 {
        int i = 0;
 
-       while (*sem == 0 && i < LOOP_TIMEOUT) {
+       while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {
                udelay(1);
                i += 1;
        }
        writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
-static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
+static void build_completion_wait(struct iommu_cmd *cmd,
+                                 struct amd_iommu *iommu,
+                                 u64 data)
 {
-       u64 paddr = iommu_virt_to_phys((void *)address);
-
-       WARN_ON(address & 0x7ULL);
+       u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
 
        memset(cmd, 0, sizeof(*cmd));
        cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
        cmd->data[1] = upper_32_bits(paddr);
-       cmd->data[2] = 1;
+       cmd->data[2] = data;
        CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
        struct iommu_cmd cmd;
        unsigned long flags;
        int ret;
+       u64 data;
 
        if (!iommu->need_sync)
                return 0;
 
-
-       build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
-
        raw_spin_lock_irqsave(&iommu->lock, flags);
 
-       iommu->cmd_sem = 0;
+       data = ++iommu->cmd_sem_val;
+       build_completion_wait(&cmd, iommu, data);
 
        ret = __iommu_queue_command_sync(iommu, &cmd, false);
        if (ret)
                goto out_unlock;
 
-       ret = wait_on_sem(&iommu->cmd_sem);
+       ret = wait_on_sem(iommu, data);
 
 out_unlock:
        raw_spin_unlock_irqrestore(&iommu->lock, flags);