break;
        }
+       phba->fcp_embed_io = 0; /* SLI4 FC support only */
 
        rc = lpfc_sli_config_port(phba, mode);
 
 
        mqe = &mboxq->u.mqe;
        phba->sli_rev = bf_get(lpfc_mbx_rd_rev_sli_lvl, &mqe->un.read_rev);
-       if (bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev))
+       if (bf_get(lpfc_mbx_rd_rev_fcoe, &mqe->un.read_rev)) {
                phba->hba_flag |= HBA_FCOE_MODE;
-       else
+               phba->fcp_embed_io = 0; /* SLI4 FC support only */
+       } else {
                phba->hba_flag &= ~HBA_FCOE_MODE;
+       }
 
        if (bf_get(lpfc_mbx_rd_rev_cee_ver, &mqe->un.read_rev) ==
                LPFC_DCBX_CEE_MODE)
        else
                command_type = ELS_COMMAND_NON_FIP;
 
+       if (phba->fcp_embed_io)
+               memset(wqe, 0, sizeof(union lpfc_wqe128));
        /* Some of the fields are in the right position already */
        memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-       abort_tag = (uint32_t) iocbq->iotag;
-       xritag = iocbq->sli4_xritag;
        wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
        wqe->generic.wqe_com.word10 = 0;
+
+       abort_tag = (uint32_t) iocbq->iotag;
+       xritag = iocbq->sli4_xritag;
        /* words0-2 bpl convert bde */
        if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) {
                numBdes = iocbq->iocb.un.genreq64.bdl.bdeSize /
                       iocbq->iocb.ulpFCP2Rcvy);
                bf_set(wqe_lnk, &wqe->fcp_iwrite.wqe_com, iocbq->iocb.ulpXS);
                /* Always open the exchange */
-               bf_set(wqe_xc, &wqe->fcp_iwrite.wqe_com, 0);
                bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_IOD_WRITE);
                bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com,
                       LPFC_WQE_LENLOC_WORD4);
-               bf_set(wqe_ebde_cnt, &wqe->fcp_iwrite.wqe_com, 0);
                bf_set(wqe_pu, &wqe->fcp_iwrite.wqe_com, iocbq->iocb.ulpPU);
                bf_set(wqe_dbde, &wqe->fcp_iwrite.wqe_com, 1);
                if (iocbq->iocb_flag & LPFC_IO_OAS) {
                                       (phba->cfg_XLanePriority << 1));
                        }
                }
+               /* Note, word 10 is already initialized to 0 */
+
+               if (phba->fcp_embed_io) {
+                       struct lpfc_scsi_buf *lpfc_cmd;
+                       struct sli4_sge *sgl;
+                       union lpfc_wqe128 *wqe128;
+                       struct fcp_cmnd *fcp_cmnd;
+                       uint32_t *ptr;
+
+                       /* 128 byte wqe support here */
+                       wqe128 = (union lpfc_wqe128 *)wqe;
+
+                       lpfc_cmd = iocbq->context1;
+                       sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+                       fcp_cmnd = lpfc_cmd->fcp_cmnd;
+
+                       /* Word 0-2 - FCP_CMND */
+                       wqe128->generic.bde.tus.f.bdeFlags =
+                               BUFF_TYPE_BDE_IMMED;
+                       wqe128->generic.bde.tus.f.bdeSize = sgl->sge_len;
+                       wqe128->generic.bde.addrHigh = 0;
+                       wqe128->generic.bde.addrLow =  88;  /* Word 22 */
+
+                       bf_set(wqe_wqes, &wqe128->fcp_iwrite.wqe_com, 1);
+
+                       /* Word 22-29  FCP CMND Payload */
+                       ptr = &wqe128->words[22];
+                       memcpy(ptr, fcp_cmnd, sizeof(struct fcp_cmnd));
+               }
                break;
        case CMD_FCP_IREAD64_CR:
                /* word3 iocb=iotag wqe=payload_offset_len */
                       iocbq->iocb.ulpFCP2Rcvy);
                bf_set(wqe_lnk, &wqe->fcp_iread.wqe_com, iocbq->iocb.ulpXS);
                /* Always open the exchange */
-               bf_set(wqe_xc, &wqe->fcp_iread.wqe_com, 0);
                bf_set(wqe_iod, &wqe->fcp_iread.wqe_com, LPFC_WQE_IOD_READ);
                bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com,
                       LPFC_WQE_LENLOC_WORD4);
-               bf_set(wqe_ebde_cnt, &wqe->fcp_iread.wqe_com, 0);
                bf_set(wqe_pu, &wqe->fcp_iread.wqe_com, iocbq->iocb.ulpPU);
                bf_set(wqe_dbde, &wqe->fcp_iread.wqe_com, 1);
                if (iocbq->iocb_flag & LPFC_IO_OAS) {
                                       (phba->cfg_XLanePriority << 1));
                        }
                }
+               /* Note, word 10 is already initialized to 0 */
+
+               if (phba->fcp_embed_io) {
+                       struct lpfc_scsi_buf *lpfc_cmd;
+                       struct sli4_sge *sgl;
+                       union lpfc_wqe128 *wqe128;
+                       struct fcp_cmnd *fcp_cmnd;
+                       uint32_t *ptr;
+
+                       /* 128 byte wqe support here */
+                       wqe128 = (union lpfc_wqe128 *)wqe;
+
+                       lpfc_cmd = iocbq->context1;
+                       sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+                       fcp_cmnd = lpfc_cmd->fcp_cmnd;
+
+                       /* Word 0-2 - FCP_CMND */
+                       wqe128->generic.bde.tus.f.bdeFlags =
+                               BUFF_TYPE_BDE_IMMED;
+                       wqe128->generic.bde.tus.f.bdeSize = sgl->sge_len;
+                       wqe128->generic.bde.addrHigh = 0;
+                       wqe128->generic.bde.addrLow =  88;  /* Word 22 */
+
+                       bf_set(wqe_wqes, &wqe128->fcp_iread.wqe_com, 1);
+
+                       /* Word 22-29  FCP CMND Payload */
+                       ptr = &wqe128->words[22];
+                       memcpy(ptr, fcp_cmnd, sizeof(struct fcp_cmnd));
+               }
                break;
        case CMD_FCP_ICMND64_CR:
                /* word3 iocb=iotag wqe=payload_offset_len */
                /* word3 iocb=IO_TAG wqe=reserved */
                bf_set(wqe_pu, &wqe->fcp_icmd.wqe_com, 0);
                /* Always open the exchange */
-               bf_set(wqe_xc, &wqe->fcp_icmd.wqe_com, 0);
                bf_set(wqe_dbde, &wqe->fcp_icmd.wqe_com, 1);
                bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_WRITE);
                bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
                bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com,
                       LPFC_WQE_LENLOC_NONE);
-               bf_set(wqe_ebde_cnt, &wqe->fcp_icmd.wqe_com, 0);
                bf_set(wqe_erp, &wqe->fcp_icmd.wqe_com,
                       iocbq->iocb.ulpFCP2Rcvy);
                if (iocbq->iocb_flag & LPFC_IO_OAS) {
                                       (phba->cfg_XLanePriority << 1));
                        }
                }
+               /* Note, word 10 is already initialized to 0 */
+
+               if (phba->fcp_embed_io) {
+                       struct lpfc_scsi_buf *lpfc_cmd;
+                       struct sli4_sge *sgl;
+                       union lpfc_wqe128 *wqe128;
+                       struct fcp_cmnd *fcp_cmnd;
+                       uint32_t *ptr;
+
+                       /* 128 byte wqe support here */
+                       wqe128 = (union lpfc_wqe128 *)wqe;
+
+                       lpfc_cmd = iocbq->context1;
+                       sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+                       fcp_cmnd = lpfc_cmd->fcp_cmnd;
+
+                       /* Word 0-2 - FCP_CMND */
+                       wqe128->generic.bde.tus.f.bdeFlags =
+                               BUFF_TYPE_BDE_IMMED;
+                       wqe128->generic.bde.tus.f.bdeSize = sgl->sge_len;
+                       wqe128->generic.bde.addrHigh = 0;
+                       wqe128->generic.bde.addrLow =  88;  /* Word 22 */
+
+                       bf_set(wqe_wqes, &wqe128->fcp_icmd.wqe_com, 1);
+
+                       /* Word 22-29  FCP CMND Payload */
+                       ptr = &wqe128->words[22];
+                       memcpy(ptr, fcp_cmnd, sizeof(struct fcp_cmnd));
+               }
                break;
        case CMD_GEN_REQUEST64_CR:
                /* For this command calculate the xmit length of the
                         struct lpfc_iocbq *piocb, uint32_t flag)
 {
        struct lpfc_sglq *sglq;
-       union lpfc_wqe wqe;
+       union lpfc_wqe *wqe;
+       union lpfc_wqe128 wqe128;
        struct lpfc_queue *wq;
        struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
 
        lockdep_assert_held(&phba->hbalock);
 
+       /*
+        * The WQE can be either 64 or 128 bytes,
+        * so allocate space on the stack assuming the largest.
+        */
+       wqe = (union lpfc_wqe *)&wqe128;
+
        if (piocb->sli4_xritag == NO_XRI) {
                if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
                    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
                        return IOCB_ERROR;
        }
 
-       if (lpfc_sli4_iocb2wqe(phba, piocb, &wqe))
+       if (lpfc_sli4_iocb2wqe(phba, piocb, wqe))
                return IOCB_ERROR;
 
        if ((piocb->iocb_flag & LPFC_IO_FCP) ||
                } else {
                        wq = phba->sli4_hba.oas_wq;
                }
-               if (lpfc_sli4_wq_put(wq, &wqe))
+               if (lpfc_sli4_wq_put(wq, wqe))
                        return IOCB_ERROR;
        } else {
                if (unlikely(!phba->sli4_hba.els_wq))
                        return IOCB_ERROR;
-               if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, &wqe))
+               if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, wqe))
                        return IOCB_ERROR;
        }
        lpfc_sli_ringtxcmpl_put(phba, pring, piocb);
  * pointer from the lpfc_hba struct.
  *
  * Return codes:
- *     IOCB_ERROR - Error
- *     IOCB_SUCCESS - Success
- *     IOCB_BUSY - Busy
+ * IOCB_ERROR - Error
+ * IOCB_SUCCESS - Success
+ * IOCB_BUSY - Busy
  **/
 int
 __lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,