From 367e4b3994543e40437e4e73305af197c21c1055 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 28 May 2012 02:55:59 +0000 Subject: [PATCH] ar71xx: improve rx performance of the ethernet driver by using build_skb to deliver a cache-hot skb to the network stack SVN-Revision: 31934 --- .../net/ethernet/atheros/ag71xx/ag71xx.h | 6 +- .../net/ethernet/atheros/ag71xx/ag71xx_main.c | 84 ++++++++++--------- 2 files changed, 49 insertions(+), 41 deletions(-) diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h index 881741660b..b9d95adaf6 100644 --- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h +++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h @@ -53,6 +53,7 @@ #define AG71XX_TX_MTU_LEN 1540 #define AG71XX_RX_PKT_SIZE \ (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN) +#define AG71XX_RX_BUF_SIZE (AG71XX_RX_PKT_SIZE + NET_SKB_PAD + NET_IP_ALIGN) #define AG71XX_TX_RING_SIZE_DEFAULT 64 #define AG71XX_RX_RING_SIZE_DEFAULT 128 @@ -85,7 +86,10 @@ struct ag71xx_desc { } __attribute__((aligned(4))); struct ag71xx_buf { - struct sk_buff *skb; + union { + struct sk_buff *skb; + void *rx_buf; + }; struct ag71xx_desc *desc; dma_addr_t dma_addr; unsigned long timestamp; diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c index 6d1aff7f7e..fb99d27281 100644 --- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c +++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c @@ -189,15 +189,17 @@ static void ag71xx_ring_rx_clean(struct ag71xx *ag) return; for (i = 0; i < ring->size; i++) - if (ring->buf[i].skb) { + if (ring->buf[i].rx_buf) { dma_unmap_single(&ag->dev->dev, ring->buf[i].dma_addr, - AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE); - kfree_skb(ring->buf[i].skb); + AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE); + kfree(ring->buf[i].rx_buf); } } -struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag) +static int ag71xx_buffer_offset(struct ag71xx *ag) { + int offset = NET_SKB_PAD; + /* * On AR71xx/AR91xx packets must be 4-byte aligned. * @@ -205,17 +207,35 @@ struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag) * so we don't need any extra alignment in that case. */ if (!ag71xx_get_pdata(ag)->is_ar724x || ag71xx_has_ar8216(ag)) - return netdev_alloc_skb(ag->dev, AG71XX_RX_PKT_SIZE); + return offset; - return netdev_alloc_skb_ip_align(ag->dev, AG71XX_RX_PKT_SIZE); + return offset + NET_IP_ALIGN; } +static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf, + int offset) +{ + void *data; + + data = kmalloc(AG71XX_RX_BUF_SIZE + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + GFP_ATOMIC); + if (!data) + return false; + + buf->rx_buf = data; + buf->dma_addr = dma_map_single(&ag->dev->dev, data, + AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE); + buf->desc->data = (u32) buf->dma_addr + offset; + return true; +} static int ag71xx_ring_rx_init(struct ag71xx *ag) { struct ag71xx_ring *ring = &ag->rx_ring; unsigned int i; int ret; + int offset = ag71xx_buffer_offset(ag); ret = 0; for (i = 0; i < ring->size; i++) { @@ -228,22 +248,11 @@ static int ag71xx_ring_rx_init(struct ag71xx *ag) } for (i = 0; i < ring->size; i++) { - struct sk_buff *skb; - dma_addr_t dma_addr; - - skb = ag71xx_rx_alloc(ag); - if (!skb) { + if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) { ret = -ENOMEM; break; } - skb->dev = ag->dev; - dma_addr = dma_map_single(&ag->dev->dev, skb->data, - AG71XX_RX_PKT_SIZE, - DMA_FROM_DEVICE); - ring->buf[i].skb = skb; - ring->buf[i].dma_addr = dma_addr; - ring->buf[i].desc->data = (u32) dma_addr; ring->buf[i].desc->ctrl = DESC_EMPTY; } @@ -260,6 +269,7 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag) { struct ag71xx_ring *ring = &ag->rx_ring; unsigned int count; + int offset = ag71xx_buffer_offset(ag); count = 0; for (; ring->curr - ring->dirty > 0; ring->dirty++) { @@ -267,24 +277,9 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag) i = ring->dirty % ring->size; - if (ring->buf[i].skb == NULL) { - dma_addr_t dma_addr; - struct sk_buff *skb; - - skb = ag71xx_rx_alloc(ag); - if (skb == NULL) - break; - - skb->dev = ag->dev; - - dma_addr = dma_map_single(&ag->dev->dev, skb->data, - AG71XX_RX_PKT_SIZE, - DMA_FROM_DEVICE); - - ring->buf[i].skb = skb; - ring->buf[i].dma_addr = dma_addr; - ring->buf[i].desc->data = (u32) dma_addr; - } + if (!ring->buf[i].rx_buf && + !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) + break; ring->buf[i].desc->ctrl = DESC_EMPTY; count++; @@ -863,6 +858,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit) { struct net_device *dev = ag->dev; struct ag71xx_ring *ring = &ag->rx_ring; + int offset = ag71xx_buffer_offset(ag); int done = 0; DBG("%s: rx packets, limit=%d, curr=%u, dirty=%u\n", @@ -885,18 +881,25 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit) ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR); - skb = ring->buf[i].skb; pktlen = ag71xx_desc_pktlen(desc); pktlen -= ETH_FCS_LEN; dma_unmap_single(&dev->dev, ring->buf[i].dma_addr, - AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE); + AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE); dev->last_rx = jiffies; dev->stats.rx_packets++; dev->stats.rx_bytes += pktlen; + skb = build_skb(ring->buf[i].rx_buf); + if (!skb) { + kfree(ring->buf[i].rx_buf); + goto next; + } + + skb_reserve(skb, offset); skb_put(skb, pktlen); + if (ag71xx_has_ar8216(ag)) err = ag71xx_remove_ar8216_header(ag, skb, pktlen); @@ -910,7 +913,8 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit) netif_receive_skb(skb); } - ring->buf[i].skb = NULL; +next: + ring->buf[i].rx_buf = NULL; done++; ring->curr++; @@ -944,7 +948,7 @@ static int ag71xx_poll(struct napi_struct *napi, int limit) ag71xx_debugfs_update_napi_stats(ag, rx_done, tx_done); rx_ring = &ag->rx_ring; - if (rx_ring->buf[rx_ring->dirty % rx_ring->size].skb == NULL) + if (rx_ring->buf[rx_ring->dirty % rx_ring->size].rx_buf == NULL) goto oom; status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS); -- 2.25.1