enum send_info_type {
        WRITE_ICM = 0,
+       GTA_ARG   = 1,
 };
 
 struct postsend_info {
        dr_qp->rq.wqe_cnt = 4;
        dr_qp->sq.pc = 0;
        dr_qp->sq.cc = 0;
+       dr_qp->sq.head = 0;
        dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
 
        MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
        mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
 }
 
-static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
-                            u32 rkey, struct dr_data_seg *data_seg,
-                            u32 opcode, bool notify_hw)
+static void
+dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+                                       u32 remote_addr,
+                                       struct dr_data_seg *data_seg,
+                                       int *size)
 {
-       struct mlx5_wqe_raddr_seg *wq_raddr;
-       struct mlx5_wqe_ctrl_seg *wq_ctrl;
-       struct mlx5_wqe_data_seg *wq_dseg;
-       unsigned int size;
-       unsigned int idx;
+       struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
+       struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
 
-       size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
-               sizeof(*wq_raddr) / 16;
+       wq_ctrl->general_id = cpu_to_be32(remote_addr);
+       wq_flow_seg = (void *)(wq_ctrl + 1);
 
-       idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+       /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
+       memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
+       wq_arg_seg = (void *)(wq_flow_seg + 1);
+
+       memcpy(wq_arg_seg->argument_list,
+              (void *)(uintptr_t)data_seg->addr,
+              data_seg->length);
+
+       *size = (sizeof(*wq_ctrl) +      /* WQE ctrl segment */
+                sizeof(*wq_flow_seg) +  /* WQE flow update ctrl seg - reserved */
+                sizeof(*wq_arg_seg)) /  /* WQE hdr modify arg seg - data */
+               MLX5_SEND_WQE_DS;
+}
+
+static void
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+                                 u64 remote_addr,
+                                 u32 rkey,
+                                 struct dr_data_seg *data_seg,
+                                 unsigned int *size)
+{
+       struct mlx5_wqe_raddr_seg *wq_raddr;
+       struct mlx5_wqe_data_seg *wq_dseg;
 
-       wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
-       wq_ctrl->imm = 0;
-       wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
-               MLX5_WQE_CTRL_CQ_UPDATE : 0;
-       wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
-                                               opcode);
-       wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
        wq_raddr = (void *)(wq_ctrl + 1);
+
        wq_raddr->raddr = cpu_to_be64(remote_addr);
        wq_raddr->rkey = cpu_to_be32(rkey);
        wq_raddr->reserved = 0;
 
        wq_dseg = (void *)(wq_raddr + 1);
+
        wq_dseg->byte_count = cpu_to_be32(data_seg->length);
        wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
        wq_dseg->addr = cpu_to_be64(data_seg->addr);
 
-       dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+       *size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
+                sizeof(*wq_dseg) +    /* WQE data segment */
+                sizeof(*wq_raddr)) /  /* WQE remote addr segment */
+               MLX5_SEND_WQE_DS;
+}
+
+static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
+                           struct dr_data_seg *data_seg)
+{
+       wq_ctrl->signature = 0;
+       wq_ctrl->rsvd[0] = 0;
+       wq_ctrl->rsvd[1] = 0;
+       wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
+                               MLX5_WQE_CTRL_CQ_UPDATE : 0;
+       wq_ctrl->imm = 0;
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+                            u32 rkey, struct dr_data_seg *data_seg,
+                            u32 opcode, bool notify_hw)
+{
+       struct mlx5_wqe_ctrl_seg *wq_ctrl;
+       int opcode_mod = 0;
+       unsigned int size;
+       unsigned int idx;
+
+       idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+       wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+       dr_set_ctrl_seg(wq_ctrl, data_seg);
+
+       switch (opcode) {
+       case MLX5_OPCODE_RDMA_READ:
+       case MLX5_OPCODE_RDMA_WRITE:
+               dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
+                                                 rkey, data_seg, &size);
+               break;
+       case MLX5_OPCODE_FLOW_TBL_ACCESS:
+               opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
+               dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
+                                                       data_seg, &size);
+               break;
+       default:
+               WARN(true, "illegal opcode %d", opcode);
+               return;
+       }
+
+       /* --------------------------------------------------------
+        * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
+        * --------------------------------------------------------
+        */
+       wq_ctrl->opmod_idx_opcode =
+               cpu_to_be32((opcode_mod << 24) |
+                           ((dr_qp->sq.pc & 0xffff) << 8) |
+                           opcode);
+       wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
+
+       dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+       dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
 
        if (notify_hw)
                dr_cmd_notify_hw(dr_qp, wq_ctrl);
                                 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
                dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
                                 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
+       } else { /* GTA_ARG */
+               dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+                                &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
        }
+
 }
 
 /**
                } else if (ne == 1) {
                        send_ring->pending_wqe -= send_ring->signal_th;
                }
-       } while (is_drain && send_ring->pending_wqe);
+       } while (ne == 1 ||
+                (is_drain && send_ring->pending_wqe  >= send_ring->signal_th));
 
        return 0;
 }
 
+static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
+                                   struct postsend_info *send_info)
+{
+       send_ring->pending_wqe++;
+
+       if (send_ring->pending_wqe % send_ring->signal_th == 0)
+               send_info->write.send_flags |= IB_SEND_SIGNALED;
+       else
+               send_info->write.send_flags = 0;
+}
+
 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_send_ring *send_ring,
                                   struct postsend_info *send_info)
 {
        if (send_info->type == WRITE_ICM)
                dr_fill_write_icm_segs(dmn, send_ring, send_info);
+       else /* args */
+               dr_fill_write_args_segs(send_ring, send_info);
 }
 
 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
        return 0;
 }
 
+int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
+                             u16 num_of_actions, u8 *actions_data)
+{
+       int data_len, iter = 0, cur_sent;
+       u64 addr;
+       int ret;
+
+       addr = (uintptr_t)actions_data;
+       data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
+
+       do {
+               struct postsend_info send_info = {};
+
+               send_info.type = GTA_ARG;
+               send_info.write.addr = addr;
+               cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
+               send_info.write.length = cur_sent;
+               send_info.write.lkey = 0;
+               send_info.remote_addr = arg_id + iter;
+
+               ret = dr_postsend_icm_data(dmn, &send_info);
+               if (ret)
+                       goto out;
+
+               iter++;
+               addr += cur_sent;
+               data_len -= cur_sent;
+       } while (data_len > 0);
+
+out:
+       return ret;
+}
+
 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
                                 struct mlx5dr_qp *dr_qp,
                                 int port)