#include "cmd.h"
 
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
                                  u16 *vhca_id);
 static void
                VFIO_MIGRATION_STOP_COPY |
                VFIO_MIGRATION_P2P;
        mvdev->core_device.vdev.mig_ops = mig_ops;
+       init_completion(&mvdev->tracker_comp);
 
 end:
        mlx5_vf_put_core_dev(mvdev->mdev);
        return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
+static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
+                                    u32 tracker_id, unsigned long iova,
+                                    unsigned long length, u32 tracker_state)
+{
+       u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {};
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+       void *obj_context;
+       void *cmd_hdr;
+
+       cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id);
+
+       obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context);
+       MLX5_SET64(page_track, obj_context, modify_field_select, 0x3);
+       MLX5_SET64(page_track, obj_context, range_start_address, iova);
+       MLX5_SET64(page_track, obj_context, length, length);
+       MLX5_SET(page_track, obj_context, state, tracker_state);
+
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
                             struct mlx5_vhca_cq_buf *buf, int nent,
                             int cqe_size)
        mlx5_db_free(mdev, &cq->db);
 }
 
+static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq,
+                              struct mlx5_eqe *eqe)
+{
+       struct mlx5vf_pci_core_device *mvdev =
+               container_of(mcq, struct mlx5vf_pci_core_device,
+                            tracker.cq.mcq);
+
+       complete(&mvdev->tracker_comp);
+}
+
 static int mlx5vf_create_cq(struct mlx5_core_dev *mdev,
                            struct mlx5_vhca_page_tracker *tracker,
                            size_t ncqe)
        MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
        mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
+       cq->mcq.comp = mlx5vf_cq_complete;
        err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
        if (err)
                goto err_vec;
 
+       mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
+                   cq->mcq.cons_index);
        kvfree(in);
        return 0;
 
        mlx5vf_state_mutex_unlock(mvdev);
        return err;
 }
+
+static void
+set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
+                 struct iova_bitmap *dirty)
+{
+       u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
+       u32 nent = size / entry_size;
+       struct page *page;
+       u64 addr;
+       u64 *buf;
+       int i;
+
+       if (WARN_ON(index >= qp->recv_buf.npages ||
+                   (nent > qp->max_msg_size / entry_size)))
+               return;
+
+       page = qp->recv_buf.page_list[index];
+       buf = kmap_local_page(page);
+       for (i = 0; i < nent; i++) {
+               addr = MLX5_GET(page_track_report_entry, buf + i,
+                               dirty_address_low);
+               addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
+                                     dirty_address_high) << 32;
+               iova_bitmap_set(dirty, addr, qp->tracked_page_size);
+       }
+       kunmap_local(buf);
+}
+
+static void
+mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe,
+             struct iova_bitmap *dirty, int *tracker_status)
+{
+       u32 size;
+       int ix;
+
+       qp->rq.cc++;
+       *tracker_status = be32_to_cpu(cqe->immediate) >> 28;
+       size = be32_to_cpu(cqe->byte_cnt);
+       ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1);
+
+       /* zero length CQE, no data */
+       WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING);
+       if (size)
+               set_report_output(size, ix, qp, dirty);
+
+       qp->recv_buf.next_rq_offset = ix * qp->max_msg_size;
+       mlx5vf_post_recv(qp);
+}
+
+static void *get_cqe(struct mlx5_vhca_cq *cq, int n)
+{
+       return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
+}
+
+static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n)
+{
+       void *cqe = get_cqe(cq, n & (cq->ncqe - 1));
+       struct mlx5_cqe64 *cqe64;
+
+       cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+       if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
+           !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) {
+               return cqe64;
+       } else {
+               return NULL;
+       }
+}
+
+static int
+mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp,
+                  struct iova_bitmap *dirty, int *tracker_status)
+{
+       struct mlx5_cqe64 *cqe;
+       u8 opcode;
+
+       cqe = get_sw_cqe(cq, cq->mcq.cons_index);
+       if (!cqe)
+               return CQ_EMPTY;
+
+       ++cq->mcq.cons_index;
+       /*
+        * Make sure we read CQ entry contents after we've checked the
+        * ownership bit.
+        */
+       rmb();
+       opcode = get_cqe_opcode(cqe);
+       switch (opcode) {
+       case MLX5_CQE_RESP_SEND_IMM:
+               mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status);
+               return CQ_OK;
+       default:
+               return CQ_POLL_ERR;
+       }
+}
+
+int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
+                                 unsigned long length,
+                                 struct iova_bitmap *dirty)
+{
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+       struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+       struct mlx5_vhca_cq *cq = &tracker->cq;
+       struct mlx5_core_dev *mdev;
+       int poll_err, err;
+
+       mutex_lock(&mvdev->state_mutex);
+       if (!mvdev->log_active) {
+               err = -EINVAL;
+               goto end;
+       }
+
+       if (mvdev->mdev_detach) {
+               err = -ENOTCONN;
+               goto end;
+       }
+
+       mdev = mvdev->mdev;
+       err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
+                                       MLX5_PAGE_TRACK_STATE_REPORTING);
+       if (err)
+               goto end;
+
+       tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING;
+       while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING) {
+               poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty,
+                                             &tracker->status);
+               if (poll_err == CQ_EMPTY) {
+                       mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
+                                   cq->mcq.cons_index);
+                       poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp,
+                                                     dirty, &tracker->status);
+                       if (poll_err == CQ_EMPTY) {
+                               wait_for_completion(&mvdev->tracker_comp);
+                               continue;
+                       }
+               }
+               if (poll_err == CQ_POLL_ERR) {
+                       err = -EIO;
+                       goto end;
+               }
+               mlx5_cq_set_ci(&cq->mcq);
+       }
+
+       if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR)
+               err = -EIO;
+
+end:
+       mlx5vf_state_mutex_unlock(mvdev);
+       return err;
+}