From e0059eaef18dbdc65ee420a337aecfa555e8d493 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 5 Feb 2014 10:10:44 +0100 Subject: [PATCH] usb:udc:samsung: Zero copy approach for data passed to Samsung's UDC driver The Samsung's UDC driver is not anymore copying data from USB requests to aligned internal buffers. Now it works directly in data allocated in the upper layers like UMS, DFU, THOR. This change is possible since those gadgets now must take care to allocate buffers aligned to cache line (CONFIG_SYS_CACHELINE_SIZE). This can be achieved by using DEFINE_CACHE_ALIGN_BUFFER() or ALLOC_CACHE_ALIGN_BUFFER() macros. Those take care to allocate buffer aligned to cache line in both starting address and its size. Sometimes it is enough to just use memalign() with size being a multiplication of cache line size. Test condition - test HW + measurement: Trats - Exynos4210 rev.1 - test HW Trats2 - Exynos4412 rev.1 400 MiB compressed rootfs image download with `thor 0 mmc 0` Measurement: Transmission speed: 27.04 MiB/s Signed-off-by: Lukasz Majewski Cc: Marek Vasut --- drivers/usb/gadget/s3c_udc_otg.c | 19 ++++--- drivers/usb/gadget/s3c_udc_otg_xfer_dma.c | 63 ++++++++++++----------- include/usb/s3c_udc.h | 5 +- 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/usb/gadget/s3c_udc_otg.c b/drivers/usb/gadget/s3c_udc_otg.c index ba17a04265..63d4487a9b 100644 --- a/drivers/usb/gadget/s3c_udc_otg.c +++ b/drivers/usb/gadget/s3c_udc_otg.c @@ -843,7 +843,7 @@ static struct s3c_udc memory = { int s3c_udc_probe(struct s3c_plat_otg_data *pdata) { struct s3c_udc *dev = &memory; - int retval = 0, i; + int retval = 0; debug("%s: %p\n", __func__, pdata); @@ -864,16 +864,15 @@ int s3c_udc_probe(struct s3c_plat_otg_data *pdata) the_controller = dev; - for (i = 0; i < S3C_MAX_ENDPOINTS+1; i++) { - dev->dma_buf[i] = memalign(CONFIG_SYS_CACHELINE_SIZE, - DMA_BUFFER_SIZE); - dev->dma_addr[i] = (dma_addr_t) dev->dma_buf[i]; - invalidate_dcache_range((unsigned long) dev->dma_buf[i], - (unsigned long) (dev->dma_buf[i] - + DMA_BUFFER_SIZE)); + usb_ctrl = memalign(CONFIG_SYS_CACHELINE_SIZE, + ROUND(sizeof(struct usb_ctrlrequest), + CONFIG_SYS_CACHELINE_SIZE)); + if (!usb_ctrl) { + error("No memory available for UDC!\n"); + return -ENOMEM; } - usb_ctrl = dev->dma_buf[0]; - usb_ctrl_dma_addr = dev->dma_addr[0]; + + usb_ctrl_dma_addr = (dma_addr_t) usb_ctrl; udc_reinit(dev); diff --git a/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c b/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c index 7c7176bfd3..06dfeed905 100644 --- a/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c +++ b/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c @@ -110,8 +110,7 @@ static int setdma_rx(struct s3c_ep *ep, struct s3c_request *req) ctrl = readl(®->out_endp[ep_num].doepctl); - writel(the_controller->dma_addr[ep_index(ep)+1], - ®->out_endp[ep_num].doepdma); + writel((unsigned int) ep->dma_buf, ®->out_endp[ep_num].doepdma); writel(DOEPT_SIZ_PKT_CNT(pktcnt) | DOEPT_SIZ_XFER_SIZE(length), ®->out_endp[ep_num].doeptsiz); writel(DEPCTL_EPENA|DEPCTL_CNAK|ctrl, ®->out_endp[ep_num].doepctl); @@ -134,7 +133,6 @@ int setdma_tx(struct s3c_ep *ep, struct s3c_request *req) u32 *buf, ctrl = 0; u32 length, pktcnt; u32 ep_num = ep_index(ep); - u32 *p = the_controller->dma_buf[ep_index(ep)+1]; buf = req->req.buf + req->req.actual; length = req->req.length - req->req.actual; @@ -144,10 +142,10 @@ int setdma_tx(struct s3c_ep *ep, struct s3c_request *req) ep->len = length; ep->dma_buf = buf; - memcpy(p, ep->dma_buf, length); - flush_dcache_range((unsigned long) p , - (unsigned long) p + DMA_BUFFER_SIZE); + flush_dcache_range((unsigned long) ep->dma_buf, + (unsigned long) ep->dma_buf + + ROUND(ep->len, CONFIG_SYS_CACHELINE_SIZE)); if (length == 0) pktcnt = 1; @@ -160,8 +158,7 @@ int setdma_tx(struct s3c_ep *ep, struct s3c_request *req) while (readl(®->grstctl) & TX_FIFO_FLUSH) ; - writel(the_controller->dma_addr[ep_index(ep)+1], - ®->in_endp[ep_num].diepdma); + writel((unsigned long) ep->dma_buf, ®->in_endp[ep_num].diepdma); writel(DIEPT_SIZ_PKT_CNT(pktcnt) | DIEPT_SIZ_XFER_SIZE(length), ®->in_endp[ep_num].dieptsiz); @@ -194,7 +191,6 @@ static void complete_rx(struct s3c_udc *dev, u8 ep_num) struct s3c_ep *ep = &dev->ep[ep_num]; struct s3c_request *req = NULL; u32 ep_tsr = 0, xfer_size = 0, is_short = 0; - u32 *p = the_controller->dma_buf[ep_index(ep)+1]; if (list_empty(&ep->queue)) { debug_cond(DEBUG_OUT_EP != 0, @@ -214,10 +210,23 @@ static void complete_rx(struct s3c_udc *dev, u8 ep_num) xfer_size = ep->len - xfer_size; - invalidate_dcache_range((unsigned long) p, - (unsigned long) p + DMA_BUFFER_SIZE); - - memcpy(ep->dma_buf, p, ep->len); + /* + * NOTE: + * + * Please be careful with proper buffer allocation for USB request, + * which needs to be aligned to CONFIG_SYS_CACHELINE_SIZE, not only + * with starting address, but also its size shall be a cache line + * multiplication. + * + * This will prevent from corruption of data allocated immediatelly + * before or after the buffer. + * + * For armv7, the cache_v7.c provides proper code to emit "ERROR" + * message to warn users. + */ + invalidate_dcache_range((unsigned long) ep->dma_buf, + (unsigned long) ep->dma_buf + + ROUND(xfer_size, CONFIG_SYS_CACHELINE_SIZE)); req->req.actual += min(xfer_size, req->req.length - req->req.actual); is_short = (xfer_size < ep->ep.maxpacket); @@ -711,19 +720,14 @@ static int write_fifo_ep0(struct s3c_ep *ep, struct s3c_request *req) int s3c_fifo_read(struct s3c_ep *ep, u32 *cp, int max) { - u32 bytes; - - bytes = sizeof(struct usb_ctrlrequest); - - invalidate_dcache_range((unsigned long) ep->dev->dma_buf[ep_index(ep)], - (unsigned long) ep->dev->dma_buf[ep_index(ep)] - + DMA_BUFFER_SIZE); + invalidate_dcache_range((unsigned long)cp, (unsigned long)cp + + ROUND(max, CONFIG_SYS_CACHELINE_SIZE)); debug_cond(DEBUG_EP0 != 0, - "%s: bytes=%d, ep_index=%d %p\n", __func__, - bytes, ep_index(ep), ep->dev->dma_buf[ep_index(ep)]); + "%s: bytes=%d, ep_index=%d 0x%p\n", __func__, + max, ep_index(ep), cp); - return bytes; + return max; } /** @@ -855,14 +859,12 @@ static int s3c_ep0_write(struct s3c_udc *dev) return 1; } -u16 g_status; - int s3c_udc_get_status(struct s3c_udc *dev, struct usb_ctrlrequest *crq) { u8 ep_num = crq->wIndex & 0x7F; + u16 g_status = 0; u32 ep_ctrl; - u32 *p = the_controller->dma_buf[1]; debug_cond(DEBUG_SETUP != 0, "%s: *** USB_REQ_GET_STATUS\n", __func__); @@ -900,12 +902,13 @@ int s3c_udc_get_status(struct s3c_udc *dev, return 1; } - memcpy(p, &g_status, sizeof(g_status)); + memcpy(usb_ctrl, &g_status, sizeof(g_status)); - flush_dcache_range((unsigned long) p, - (unsigned long) p + DMA_BUFFER_SIZE); + flush_dcache_range((unsigned long) usb_ctrl, + (unsigned long) usb_ctrl + + ROUND(sizeof(g_status), CONFIG_SYS_CACHELINE_SIZE)); - writel(the_controller->dma_addr[1], ®->in_endp[EP0_CON].diepdma); + writel(usb_ctrl_dma_addr, ®->in_endp[EP0_CON].diepdma); writel(DIEPT_SIZ_PKT_CNT(1) | DIEPT_SIZ_XFER_SIZE(2), ®->in_endp[EP0_CON].dieptsiz); diff --git a/include/usb/s3c_udc.h b/include/usb/s3c_udc.h index 734c6cd2f7..6dead2fb46 100644 --- a/include/usb/s3c_udc.h +++ b/include/usb/s3c_udc.h @@ -19,7 +19,7 @@ /*-------------------------------------------------------------------------*/ /* DMA bounce buffer size, 16K is enough even for mass storage */ -#define DMA_BUFFER_SIZE (4096*4) +#define DMA_BUFFER_SIZE (16*SZ_1K) #define EP0_FIFO_SIZE 64 #define EP_FIFO_SIZE 512 @@ -81,9 +81,6 @@ struct s3c_udc { struct s3c_plat_otg_data *pdata; - void *dma_buf[S3C_MAX_ENDPOINTS+1]; - dma_addr_t dma_addr[S3C_MAX_ENDPOINTS+1]; - int ep0state; struct s3c_ep ep[S3C_MAX_ENDPOINTS]; -- 2.25.1