/* maximum number of VMIDs */
 #define AMDGPU_NUM_VM  16
 
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE      0x3FFFF
+
 /* number of entries in page table */
 #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
 
 
 
                pde = pd_addr + pt_idx * 8;
                if (((last_pde + 8 * count) != pde) ||
-                   ((last_pt + incr * count) != pt)) {
+                   ((last_pt + incr * count) != pt) ||
+                   (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
                        if (count) {
                                amdgpu_vm_update_pages(¶ms, last_pde,
                next_pe_start = amdgpu_bo_gpu_offset(pt);
                next_pe_start += (addr & mask) * 8;
 
-               if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
+               if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
+                   ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
                        /* The next ptb is consecutive to current ptb.
                         * Don't call amdgpu_vm_update_pages now.
                         * Will update two ptbs together in future.
 
                                 uint64_t pe, uint64_t src,
                                 unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
-                       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+               SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
  *
  * Update the page tables using sDMA (CIK).
  */
-static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                   uint64_t pe,
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                    uint64_t addr, unsigned count,
                                    uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**
 
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
  *
  * Update the page tables using sDMA (CIK).
  */
-static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                    uint64_t pe,
+static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**
 
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
  *
  * Update the page tables using sDMA (CIK).
  */
-static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                    uint64_t pe,
+static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**