summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google/gve/gve_rx.c
diff options
context:
space:
mode:
authorCatherine Sullivan <csully@google.com>2020-12-08 01:45:24 +0300
committerDavid S. Miller <davem@davemloft.net>2020-12-09 03:06:28 +0300
commitede3fcf5ec67f717e297f060ad00b524a074c4e0 (patch)
tree0f5217467597c45d4092e8ccbe9d2d28866c0200 /drivers/net/ethernet/google/gve/gve_rx.c
parent4944db80ac1133fe4acb0c9756758da088338b26 (diff)
downloadlinux-ede3fcf5ec67f717e297f060ad00b524a074c4e0.tar.xz
gve: Add support for raw addressing to the rx path
Add support to use raw dma addresses in the rx path. Due to this new support we can alloc a new buffer instead of making a copy. RX buffers are handed to the networking stack and are re-allocated as needed, avoiding the need to use skb_copy_to_linear_data() as in "qpl" mode. Reviewed-by: Yangchun Fu <yangchun@google.com> Signed-off-by: Catherine Sullivan <csully@google.com> Signed-off-by: David Awogbemila <awogbemila@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_rx.c')
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c205
1 files changed, 158 insertions, 47 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 008fa897a3e6..596772f5e29a 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -16,12 +16,39 @@ static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
block->rx = NULL;
}
+static void gve_rx_free_buffer(struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ union gve_rx_data_slot *data_slot)
+{
+ dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
+ GVE_DATA_SLOT_ADDR_PAGE_MASK);
+
+ gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
+}
+
+static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ if (rx->data.raw_addressing) {
+ u32 slots = rx->mask + 1;
+ int i;
+
+ for (i = 0; i < slots; i++)
+ gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ } else {
+ gve_unassign_qpl(priv, rx->data.qpl->id);
+ rx->data.qpl = NULL;
+ }
+ kvfree(rx->data.page_info);
+ rx->data.page_info = NULL;
+}
+
static void gve_rx_free_ring(struct gve_priv *priv, int idx)
{
struct gve_rx_ring *rx = &priv->rx[idx];
struct device *dev = &priv->pdev->dev;
+ u32 slots = rx->mask + 1;
size_t bytes;
- u32 slots;
gve_rx_remove_from_block(priv, idx);
@@ -33,11 +60,8 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx)
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
- gve_unassign_qpl(priv, rx->data.qpl->id);
- rx->data.qpl = NULL;
- kvfree(rx->data.page_info);
+ gve_rx_unfill_pages(priv, rx);
- slots = rx->mask + 1;
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(dev, bytes, rx->data.data_ring,
rx->data.data_bus);
@@ -46,19 +70,35 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx)
}
static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
- struct gve_rx_data_slot *slot,
- dma_addr_t addr, struct page *page)
+ dma_addr_t addr, struct page *page, __be64 *slot_addr)
{
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
- slot->qpl_offset = cpu_to_be64(addr);
+ *slot_addr = cpu_to_be64(addr);
+}
+
+static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ union gve_rx_data_slot *data_slot)
+{
+ struct page *page;
+ dma_addr_t dma;
+ int err;
+
+ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
+ if (err)
+ return err;
+
+ gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
+ return 0;
}
static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
{
struct gve_priv *priv = rx->gve;
u32 slots;
+ int err;
int i;
/* Allocate one page per Rx queue slot. Each page is split into two
@@ -71,17 +111,30 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
if (!rx->data.page_info)
return -ENOMEM;
- rx->data.qpl = gve_assign_rx_qpl(priv);
-
+ if (!rx->data.raw_addressing)
+ rx->data.qpl = gve_assign_rx_qpl(priv);
for (i = 0; i < slots; i++) {
- struct page *page = rx->data.qpl->pages[i];
- dma_addr_t addr = i * PAGE_SIZE;
+ if (!rx->data.raw_addressing) {
+ struct page *page = rx->data.qpl->pages[i];
+ dma_addr_t addr = i * PAGE_SIZE;
- gve_setup_rx_buffer(&rx->data.page_info[i],
- &rx->data.data_ring[i], addr, page);
+ gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
+ &rx->data.data_ring[i].qpl_offset);
+ continue;
+ }
+ err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ if (err)
+ goto alloc_err;
}
return slots;
+alloc_err:
+ while (i--)
+ gve_rx_free_buffer(&priv->pdev->dev,
+ &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ return err;
}
static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
@@ -110,8 +163,9 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
rx->gve = priv;
rx->q_num = idx;
- slots = priv->rx_pages_per_qpl;
+ slots = priv->rx_data_slot_cnt;
rx->mask = slots - 1;
+ rx->data.raw_addressing = priv->raw_addressing;
/* alloc rx data ring */
bytes = sizeof(*rx->data.data_ring) * slots;
@@ -156,8 +210,8 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
err = -ENOMEM;
goto abort_with_q_resources;
}
- rx->mask = slots - 1;
rx->cnt = 0;
+ rx->db_threshold = priv->rx_desc_cnt / 2;
rx->desc.seqno = 1;
gve_rx_add_to_block(priv, idx);
@@ -168,7 +222,7 @@ abort_with_q_resources:
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
abort_filled:
- kvfree(rx->data.page_info);
+ gve_rx_unfill_pages(priv, rx);
abort_with_slots:
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
@@ -233,7 +287,7 @@ static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
{
struct sk_buff *skb = napi_alloc_skb(napi, len);
void *va = page_info->page_address + GVE_RX_PAD +
- page_info->page_offset;
+ (page_info->page_offset ? PAGE_SIZE / 2 : 0);
if (unlikely(!skb))
return NULL;
@@ -251,8 +305,7 @@ static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
return skb;
}
-static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
- struct napi_struct *napi,
+static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info,
u16 len)
{
@@ -262,20 +315,19 @@ static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
return NULL;
skb_add_rx_frag(skb, 0, page_info->page,
- page_info->page_offset +
+ (page_info->page_offset ? PAGE_SIZE / 2 : 0) +
GVE_RX_PAD, len, PAGE_SIZE / 2);
return skb;
}
-static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
- struct gve_rx_data_slot *data_ring)
+static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
{
- u64 addr = be64_to_cpu(data_ring->qpl_offset);
+ const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
- page_info->page_offset ^= PAGE_SIZE / 2;
- addr ^= PAGE_SIZE / 2;
- data_ring->qpl_offset = cpu_to_be64(addr);
+ /* "flip" to other packet buffer on this page */
+ page_info->page_offset ^= 0x1;
+ *(slot_addr) ^= offset;
}
static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
@@ -285,7 +337,9 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
struct gve_priv *priv = rx->gve;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
struct net_device *dev = priv->dev;
- struct sk_buff *skb;
+ union gve_rx_data_slot *data_slot;
+ struct sk_buff *skb = NULL;
+ dma_addr_t page_bus;
int pagecount;
u16 len;
@@ -294,18 +348,18 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
u64_stats_update_begin(&rx->statss);
rx->rx_desc_err_dropped_pkt++;
u64_stats_update_end(&rx->statss);
- return true;
+ return false;
}
len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
page_info = &rx->data.page_info[idx];
- dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
- PAGE_SIZE, DMA_FROM_DEVICE);
- /* gvnic can only receive into registered segments. If the buffer
- * can't be recycled, our only choice is to copy the data out of
- * it so that we can return it to the device.
- */
+ data_slot = &rx->data.data_ring[idx];
+ page_bus = (rx->data.raw_addressing) ?
+ be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
+ rx->data.qpl->page_buses[idx];
+ dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
+ PAGE_SIZE, DMA_FROM_DEVICE);
if (PAGE_SIZE == 4096) {
if (len <= priv->rx_copybreak) {
@@ -316,6 +370,10 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
u64_stats_update_end(&rx->statss);
goto have_skb;
}
+ if (rx->data.raw_addressing) {
+ skb = gve_rx_add_frags(napi, page_info, len);
+ goto have_skb;
+ }
if (unlikely(!gve_can_recycle_pages(dev))) {
skb = gve_rx_copy(rx, dev, napi, page_info, len);
goto have_skb;
@@ -326,17 +384,17 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
* the page fragment to a new SKB and pass it up the
* stack.
*/
- skb = gve_rx_add_frags(dev, napi, page_info, len);
+ skb = gve_rx_add_frags(napi, page_info, len);
if (!skb) {
u64_stats_update_begin(&rx->statss);
rx->rx_skb_alloc_fail++;
u64_stats_update_end(&rx->statss);
- return true;
+ return false;
}
/* Make sure the kernel stack can't release the page */
get_page(page_info->page);
/* "flip" to other packet buffer on this page */
- gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
+ gve_rx_flip_buff(page_info, &rx->data.data_ring[idx].qpl_offset);
} else if (pagecount >= 2) {
/* We have previously passed the other half of this
* page up the stack, but it has not yet been freed.
@@ -347,7 +405,10 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
return false;
}
} else {
- skb = gve_rx_copy(rx, dev, napi, page_info, len);
+ if (rx->data.raw_addressing)
+ skb = gve_rx_add_frags(napi, page_info, len);
+ else
+ skb = gve_rx_copy(rx, dev, napi, page_info, len);
}
have_skb:
@@ -358,7 +419,7 @@ have_skb:
u64_stats_update_begin(&rx->statss);
rx->rx_skb_alloc_fail++;
u64_stats_update_end(&rx->statss);
- return true;
+ return false;
}
if (likely(feat & NETIF_F_RXCSUM)) {
@@ -399,19 +460,48 @@ static bool gve_rx_work_pending(struct gve_rx_ring *rx)
return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
}
+static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ int refill_target = rx->mask + 1;
+ u32 fill_cnt = rx->fill_cnt;
+
+ while (fill_cnt - rx->cnt < refill_target) {
+ struct gve_rx_slot_page_info *page_info;
+ struct device *dev = &priv->pdev->dev;
+ union gve_rx_data_slot *data_slot;
+ u32 idx = fill_cnt & rx->mask;
+
+ page_info = &rx->data.page_info[idx];
+ data_slot = &rx->data.data_ring[idx];
+ gve_rx_free_buffer(dev, page_info, data_slot);
+ page_info->page = NULL;
+ if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ break;
+ }
+ fill_cnt++;
+ }
+ rx->fill_cnt = fill_cnt;
+ return true;
+}
+
bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
struct gve_priv *priv = rx->gve;
+ u32 work_done = 0, packets = 0;
struct gve_rx_desc *desc;
u32 cnt = rx->cnt;
u32 idx = cnt & rx->mask;
- u32 work_done = 0;
u64 bytes = 0;
desc = rx->desc.desc_ring + idx;
while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
work_done < budget) {
+ bool dropped;
+
netif_info(priv, rx_status, priv->dev,
"[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
rx->q_num, idx, desc, desc->flags_seq);
@@ -419,9 +509,11 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
"[%d] seqno=%d rx->desc.seqno=%d\n",
rx->q_num, GVE_SEQNO(desc->flags_seq),
rx->desc.seqno);
- bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
- if (!gve_rx(rx, desc, feat, idx))
- gve_schedule_reset(priv);
+ dropped = !gve_rx(rx, desc, feat, idx);
+ if (!dropped) {
+ bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+ packets++;
+ }
cnt++;
idx = cnt & rx->mask;
desc = rx->desc.desc_ring + idx;
@@ -429,15 +521,34 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
work_done++;
}
- if (!work_done)
+ if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
return false;
u64_stats_update_begin(&rx->statss);
- rx->rpackets += work_done;
+ rx->rpackets += packets;
rx->rbytes += bytes;
u64_stats_update_end(&rx->statss);
rx->cnt = cnt;
- rx->fill_cnt += work_done;
+
+ /* restock ring slots */
+ if (!rx->data.raw_addressing) {
+ /* In QPL mode buffs are refilled as the desc are processed */
+ rx->fill_cnt += work_done;
+ } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
+ /* In raw addressing mode buffs are only refilled if the avail
+ * falls below a threshold.
+ */
+ if (!gve_rx_refill_buffers(priv, rx))
+ return false;
+
+ /* If we were not able to completely refill buffers, we'll want
+ * to schedule this queue for work again to refill buffers.
+ */
+ if (rx->fill_cnt - cnt <= rx->db_threshold) {
+ gve_rx_write_doorbell(priv, rx);
+ return true;
+ }
+ }
gve_rx_write_doorbell(priv, rx);
return gve_rx_work_pending(rx);