transferred = ur->length - length;
        ur->actual += transferred;
 
-       if (dwc->ep0_bounced) {
+       if ((IS_ALIGNED(ur->length, ep0->endpoint.maxpacket) &&
+            ur->length && ur->zero) || dwc->ep0_bounced) {
                trb++;
                trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+               trace_dwc3_complete_trb(ep0, trb);
                ep0->trb_enqueue = 0;
                dwc->ep0_bounced = false;
        }
 
-       if ((epnum & 1) && ur->actual < ur->length) {
-               /* for some reason we did not get everything out */
-
+       if ((epnum & 1) && ur->actual < ur->length)
                dwc3_ep0_stall_and_restart(dwc);
-       } else {
+       else
                dwc3_gadget_giveback(ep0, r, 0);
-
-               if (IS_ALIGNED(ur->length, ep0->endpoint.maxpacket) &&
-                               ur->length && ur->zero) {
-                       struct dwc3_ep *dep;
-                       int ret;
-
-                       dwc->ep0_next_event = DWC3_EP0_COMPLETE;
-
-                       dep = dwc->eps[epnum];
-                       dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr,
-                                       0, DWC3_TRBCTL_CONTROL_DATA, false);
-                       ret = dwc3_ep0_start_trans(dep);
-                       WARN_ON(ret < 0);
-               }
-       }
 }
 
 static void dwc3_ep0_complete_status(struct dwc3 *dwc,
                                         DWC3_TRBCTL_CONTROL_DATA,
                                         false);
                ret = dwc3_ep0_start_trans(dep);
+       } else if (IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) &&
+                  req->request.length && req->request.zero) {
+               u32     maxpacket;
+               u32     rem;
+
+               ret = usb_gadget_map_request_by_dev(dwc->sysdev,
+                               &req->request, dep->number);
+               if (ret)
+                       return;
+
+               maxpacket = dep->endpoint.maxpacket;
+               rem = req->request.length % maxpacket;
+
+               /* prepare normal TRB */
+               dwc3_ep0_prepare_one_trb(dep, req->request.dma,
+                                        req->request.length,
+                                        DWC3_TRBCTL_CONTROL_DATA,
+                                        true);
+
+               /* Now prepare one extra TRB to align transfer size */
+               dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr,
+                                        0, DWC3_TRBCTL_CONTROL_DATA,
+                                        false);
+               ret = dwc3_ep0_start_trans(dep);
        } else {
                ret = usb_gadget_map_request_by_dev(dwc->sysdev,
                                &req->request, dep->number);
 
                                false, 0, req->request.stream_id,
                                req->request.short_not_ok,
                                req->request.no_interrupt);
+       } else if (req->request.zero && req->request.length &&
+                  (IS_ALIGNED(req->request.length,dep->endpoint.maxpacket))) {
+               struct dwc3     *dwc = dep->dwc;
+               struct dwc3_trb *trb;
+
+               req->zero = true;
+
+               /* prepare normal TRB */
+               dwc3_prepare_one_trb(dep, req, true, 0);
+
+               /* Now prepare one extra TRB to handle ZLP */
+               trb = &dep->trb_pool[dep->trb_enqueue];
+               __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0,
+                               false, 0, req->request.stream_id,
+                               req->request.short_not_ok,
+                               req->request.no_interrupt);
        } else {
                dwc3_prepare_one_trb(dep, req, false, 0);
        }
        return ret;
 }
 
-static void __dwc3_gadget_ep_zlp_complete(struct usb_ep *ep,
-               struct usb_request *request)
-{
-       dwc3_gadget_ep_free_request(ep, request);
-}
-
-static int __dwc3_gadget_ep_queue_zlp(struct dwc3 *dwc, struct dwc3_ep *dep)
-{
-       struct dwc3_request             *req;
-       struct usb_request              *request;
-       struct usb_ep                   *ep = &dep->endpoint;
-
-       request = dwc3_gadget_ep_alloc_request(ep, GFP_ATOMIC);
-       if (!request)
-               return -ENOMEM;
-
-       request->length = 0;
-       request->buf = dwc->zlp_buf;
-       request->complete = __dwc3_gadget_ep_zlp_complete;
-
-       req = to_dwc3_request(request);
-
-       return __dwc3_gadget_ep_queue(dep, req);
-}
-
 static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request,
        gfp_t gfp_flags)
 {
 
        spin_lock_irqsave(&dwc->lock, flags);
        ret = __dwc3_gadget_ep_queue(dep, req);
-
-       /*
-        * Okay, here's the thing, if gadget driver has requested for a ZLP by
-        * setting request->zero, instead of doing magic, we will just queue an
-        * extra usb_request ourselves so that it gets handled the same way as
-        * any other request.
-        */
-       if (ret == 0 && request->zero && request->length &&
-           (request->length % ep->maxpacket == 0))
-               ret = __dwc3_gadget_ep_queue_zlp(dwc, dep);
-
        spin_unlock_irqrestore(&dwc->lock, flags);
 
        return ret;
                                        dwc3_ep_inc_deq(dep);
                                }
 
-                               if (r->unaligned) {
+                               if (r->unaligned || r->zero) {
                                        trb = r->trb + r->num_pending_sgs + 1;
                                        trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                                        dwc3_ep_inc_deq(dep);
                                trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                                dwc3_ep_inc_deq(dep);
 
-                               if (r->unaligned) {
+                               if (r->unaligned || r->zero) {
                                        trb = r->trb + 1;
                                        trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                                        dwc3_ep_inc_deq(dep);
         * with one TRB pending in the ring. We need to manually clear HWO bit
         * from that TRB.
         */
-       if (req->unaligned && (trb->ctrl & DWC3_TRB_CTRL_HWO)) {
+       if ((req->zero || req->unaligned) && (trb->ctrl & DWC3_TRB_CTRL_HWO)) {
                trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
                return 1;
        }
                                        event, status, chain);
                }
 
-               if (req->unaligned) {
+               if (req->unaligned || req->zero) {
                        trb = &dep->trb_pool[dep->trb_dequeue];
                        ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
                                        event, status, false);
                        req->unaligned = false;
+                       req->zero = false;
                }
 
                req->request.actual = length - req->remaining;
                goto err1;
        }
 
-       dwc->zlp_buf = kzalloc(DWC3_ZLP_BUF_SIZE, GFP_KERNEL);
-       if (!dwc->zlp_buf) {
-               ret = -ENOMEM;
-               goto err2;
-       }
-
        dwc->bounce = dma_alloc_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE,
                        &dwc->bounce_addr, GFP_KERNEL);
        if (!dwc->bounce) {
                ret = -ENOMEM;
-               goto err3;
+               goto err2;
        }
 
        init_completion(&dwc->ep0_in_setup);
 
        ret = dwc3_gadget_init_endpoints(dwc, dwc->num_eps);
        if (ret)
-               goto err4;
+               goto err3;
 
        ret = usb_add_gadget_udc(dwc->dev, &dwc->gadget);
        if (ret) {
                dev_err(dwc->dev, "failed to register udc\n");
-               goto err5;
+               goto err4;
        }
 
        return 0;
-err5:
-       dwc3_gadget_free_endpoints(dwc);
 
 err4:
-       dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
-                       dwc->bounce_addr);
+       dwc3_gadget_free_endpoints(dwc);
 
 err3:
-       kfree(dwc->zlp_buf);
+       dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
+                       dwc->bounce_addr);
 
 err2:
        kfree(dwc->setup_buf);
 void dwc3_gadget_exit(struct dwc3 *dwc)
 {
        usb_del_gadget_udc(&dwc->gadget);
-
        dwc3_gadget_free_endpoints(dwc);
-
        dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
-                       dwc->bounce_addr);
+                         dwc->bounce_addr);
        kfree(dwc->setup_buf);
-       kfree(dwc->zlp_buf);
-
        dma_free_coherent(dwc->sysdev, sizeof(*dwc->ep0_trb) * 2,
-                       dwc->ep0_trb, dwc->ep0_trb_addr);
+                         dwc->ep0_trb, dwc->ep0_trb_addr);
 }
 
 int dwc3_gadget_suspend(struct dwc3 *dwc)