diff options
author | David S. Miller <davem@davemloft.net> | 2022-02-09 16:15:35 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-02-09 16:15:35 +0300 |
commit | 62b5b162e472cb67a5446068858c073c4f661dd1 (patch) | |
tree | 5d6cfba7abcb62170083a9b87fc6ec8d6590f123 /drivers | |
parent | dc178d31b9428f5b8520fdfd07210c3595f06cb0 (diff) | |
parent | 86ec882f59a070e07d1e74c5b03340180ad90a1e (diff) | |
download | linux-62b5b162e472cb67a5446068858c073c4f661dd1.tar.xz |
Merge branch 'dpaa2-eth-sw-TSO'
Ioana Ciornei says:
====================
dpaa2-eth: add support for software TSO
This series adds support for driver level TSO in the dpaa2-eth driver.
The first 5 patches lay the ground work for the actual feature:
rearrange some variable declaration, cleaning up the interraction with
the S/G Table buffer cache etc.
The 6th patch adds the actual driver level software TSO support by using
the usual tso_build_hdr()/tso_build_data() APIs and creates the S/G FDs.
With this patch set we can see the following improvement in a TCP flow
running on a single A72@2.2GHz of the LX2160A SoC:
before: 6.38Gbit/s
after: 8.48Gbit/s
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 343 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 18 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 | ||||
-rw-r--r-- | drivers/soc/fsl/dpio/qbman-portal.c | 8 |
4 files changed, 301 insertions, 70 deletions
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index e985ae008a97..88534aa29af2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -18,6 +18,7 @@ #include <linux/ptp_classify.h> #include <net/pkt_cls.h> #include <net/sock.h> +#include <net/tso.h> #include "dpaa2-eth.h" @@ -760,6 +761,39 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, } } +static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + void *sgt_buf = NULL; + int sgt_buf_size; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + sgt_buf_size = priv->tx_data_offset + + DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry); + + if (sgt_cache->count == 0) + sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + else + sgt_buf = sgt_cache->buf[--sgt_cache->count]; + if (!sgt_buf) + return NULL; + + memset(sgt_buf, 0, sgt_buf_size); + + return sgt_buf; +} + +static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) + skb_free_frag(sgt_buf); + else + sgt_cache->buf[sgt_cache->count++] = sgt_buf; +} + /* Create a frame descriptor based on a fragmented skb */ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, struct sk_buff *skb, @@ -805,12 +839,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; } - memset(sgt_buf, 0, sgt_buf_size); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); @@ -846,6 +879,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, err = -ENOMEM; goto dma_map_single_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, addr); @@ -855,7 +889,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, return 0; dma_map_single_failed: - skb_free_frag(sgt_buf); + dpaa2_eth_sgt_recycle(priv, sgt_buf); sgt_buf_alloc_failed: dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL); dma_map_sg_failed: @@ -875,7 +909,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, void **swa_addr) { struct device *dev = priv->net_dev->dev.parent; - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; struct dpaa2_eth_swa *swa; dma_addr_t addr, sgt_addr; @@ -884,18 +917,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, int err; /* Prepare the HW SGT structure */ - sgt_cache = this_cpu_ptr(priv->sgt_cache); sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry); - - if (sgt_cache->count == 0) - sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN, - GFP_ATOMIC); - else - sgt_buf = sgt_cache->buf[--sgt_cache->count]; + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) return -ENOMEM; - - sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL); @@ -923,6 +948,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, goto sgt_map_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, sgt_addr); @@ -934,10 +960,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, sgt_map_failed: dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL); data_map_failed: - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(sgt_buf); - else - sgt_cache->buf[sgt_cache->count++] = sgt_buf; + dpaa2_eth_sgt_recycle(priv, sgt_buf); return err; } @@ -978,6 +1001,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, if (unlikely(dma_mapping_error(dev, addr))) return -ENOMEM; + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_addr(fd, addr); dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start)); dpaa2_fd_set_len(fd, skb->len); @@ -1005,9 +1029,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, struct dpaa2_eth_swa *swa; u8 fd_format = dpaa2_fd_get_format(fd); u32 fd_len = dpaa2_fd_get_len(fd); - - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; + int should_free_skb = 1; + int i; fd_addr = dpaa2_fd_get_addr(fd); buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr); @@ -1039,6 +1063,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, /* Unmap the SGT buffer */ dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, DMA_BIDIRECTIONAL); + } else if (swa->type == DPAA2_ETH_SWA_SW_TSO) { + skb = swa->tso.skb; + + sgt = (struct dpaa2_sg_entry *)(buffer_start + + priv->tx_data_offset); + + /* Unmap and free the header */ + dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE, + DMA_TO_DEVICE); + kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt))); + + /* Unmap the other SG entries for the data */ + for (i = 1; i < swa->tso.num_sg; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the SGT buffer */ + dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, + DMA_BIDIRECTIONAL); + + if (!swa->tso.is_last_fd) + should_free_skb = 0; } else { skb = swa->single.skb; @@ -1067,55 +1113,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, } /* Get the timestamp value */ - if (skb->cb[0] == TX_TSTAMP) { - struct skb_shared_hwtstamps shhwtstamps; - __le64 *ts = dpaa2_get_ts(buffer_start, true); - u64 ns; - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - - ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); - shhwtstamps.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(skb, &shhwtstamps); - } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { - mutex_unlock(&priv->onestep_tstamp_lock); + if (swa->type != DPAA2_ETH_SWA_SW_TSO) { + if (skb->cb[0] == TX_TSTAMP) { + struct skb_shared_hwtstamps shhwtstamps; + __le64 *ts = dpaa2_get_ts(buffer_start, true); + u64 ns; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + + ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &shhwtstamps); + } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { + mutex_unlock(&priv->onestep_tstamp_lock); + } } /* Free SGT buffer allocated on tx */ - if (fd_format != dpaa2_fd_single) { - sgt_cache = this_cpu_ptr(priv->sgt_cache); - if (swa->type == DPAA2_ETH_SWA_SG) { - skb_free_frag(buffer_start); - } else { - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(buffer_start); - else - sgt_cache->buf[sgt_cache->count++] = buffer_start; + if (fd_format != dpaa2_fd_single) + dpaa2_eth_sgt_recycle(priv, buffer_start); + + /* Move on with skb release. If we are just confirming multiple FDs + * from the same TSO skb then only the last one will need to free the + * skb. + */ + if (should_free_skb) + napi_consume_skb(skb, in_napi); +} + +static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv, + struct sk_buff *skb, struct dpaa2_fd *fd, + int *num_fds, u32 *total_fds_len) +{ + struct device *dev = priv->net_dev->dev.parent; + int hdr_len, total_len, data_left, fd_len; + int num_sge, err, i, sgt_buf_size; + struct dpaa2_fd *fd_start = fd; + struct dpaa2_sg_entry *sgt; + struct dpaa2_eth_swa *swa; + dma_addr_t sgt_addr, addr; + dma_addr_t tso_hdr_dma; + unsigned int index = 0; + struct tso_t tso; + char *tso_hdr; + void *sgt_buf; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + *total_fds_len = 0; + + total_len = skb->len - hdr_len; + while (total_len > 0) { + /* Prepare the HW SGT structure for this frame */ + sgt_buf = dpaa2_eth_sgt_get(priv); + if (unlikely(!sgt_buf)) { + netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n"); + err = -ENOMEM; + goto err_sgt_get; } + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + + /* Determine the data length of this frame */ + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + fd_len = data_left + hdr_len; + + /* Prepare packet headers: MAC + IP + TCP */ + tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC); + if (!tso_hdr) { + err = -ENOMEM; + goto err_alloc_tso_hdr; + } + + tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0); + tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, tso_hdr_dma)) { + netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n"); + err = -ENOMEM; + goto err_map_tso_hdr; + } + + /* Setup the SG entry for the header */ + dpaa2_sg_set_addr(sgt, tso_hdr_dma); + dpaa2_sg_set_len(sgt, hdr_len); + dpaa2_sg_set_final(sgt, data_left > 0 ? false : true); + + /* Compose the SG entries for each fragment of data */ + num_sge = 1; + while (data_left > 0) { + int size; + + /* Move to the next SG entry */ + sgt++; + size = min_t(int, tso.size, data_left); + + addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, addr)) { + netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n"); + err = -ENOMEM; + goto err_map_data; + } + dpaa2_sg_set_addr(sgt, addr); + dpaa2_sg_set_len(sgt, size); + dpaa2_sg_set_final(sgt, size == data_left ? true : false); + + num_sge++; + + /* Build the data for the __next__ fragment */ + data_left -= size; + tso_build_data(skb, &tso, size); + } + + /* Store the skb backpointer in the SGT buffer */ + sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry); + swa = (struct dpaa2_eth_swa *)sgt_buf; + swa->type = DPAA2_ETH_SWA_SW_TSO; + swa->tso.skb = skb; + swa->tso.num_sg = num_sge; + swa->tso.sgt_size = sgt_buf_size; + swa->tso.is_last_fd = total_len == 0 ? 1 : 0; + + /* Separately map the SGT buffer */ + sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, sgt_addr))) { + netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n"); + err = -ENOMEM; + goto err_map_sgt; + } + + /* Setup the frame descriptor */ + memset(fd, 0, sizeof(struct dpaa2_fd)); + dpaa2_fd_set_offset(fd, priv->tx_data_offset); + dpaa2_fd_set_format(fd, dpaa2_fd_sg); + dpaa2_fd_set_addr(fd, sgt_addr); + dpaa2_fd_set_len(fd, fd_len); + dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA); + + *total_fds_len += fd_len; + /* Advance to the next frame descriptor */ + fd++; + index++; } - /* Move on with skb release */ - napi_consume_skb(skb, in_napi); + *num_fds = index; + + return 0; + +err_map_sgt: +err_map_data: + /* Unmap all the data S/G entries for the current FD */ + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + for (i = 1; i < num_sge; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the header entry */ + dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE); +err_map_tso_hdr: + kfree(tso_hdr); +err_alloc_tso_hdr: + dpaa2_eth_sgt_recycle(priv, sgt_buf); +err_sgt_get: + /* Free all the other FDs that were already fully created */ + for (i = 0; i < index; i++) + dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false); + + return err; } static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); - struct dpaa2_fd fd; - struct rtnl_link_stats64 *percpu_stats; + int total_enqueued = 0, retries = 0, enqueued; struct dpaa2_eth_drv_stats *percpu_extras; + struct rtnl_link_stats64 *percpu_stats; + unsigned int needed_headroom; + int num_fds = 1, max_retries; struct dpaa2_eth_fq *fq; struct netdev_queue *nq; + struct dpaa2_fd *fd; u16 queue_mapping; - unsigned int needed_headroom; - u32 fd_len; + void *swa = NULL; u8 prio = 0; int err, i; - void *swa; + u32 fd_len; percpu_stats = this_cpu_ptr(priv->percpu_stats); percpu_extras = this_cpu_ptr(priv->percpu_extras); + fd = (this_cpu_ptr(priv->fd))->array; needed_headroom = dpaa2_eth_needed_headroom(skb); @@ -1130,20 +1316,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, } /* Setup the FD fields */ - memset(&fd, 0, sizeof(fd)); - if (skb_is_nonlinear(skb)) { - err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa); + if (skb_is_gso(skb)) { + err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len); + percpu_extras->tx_sg_frames += num_fds; + percpu_extras->tx_sg_bytes += fd_len; + percpu_extras->tx_tso_frames += num_fds; + percpu_extras->tx_tso_bytes += fd_len; + } else if (skb_is_nonlinear(skb)) { + err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else if (skb_headroom(skb) < needed_headroom) { - err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa); + err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; percpu_extras->tx_converted_sg_frames++; percpu_extras->tx_converted_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else { - err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa); + err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa); + fd_len = dpaa2_fd_get_len(fd); } if (unlikely(err)) { @@ -1151,11 +1345,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, goto err_build_fd; } - if (skb->cb[0]) - dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb); + if (swa && skb->cb[0]) + dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb); /* Tracing point */ - trace_dpaa2_tx_fd(net_dev, &fd); + for (i = 0; i < num_fds; i++) + trace_dpaa2_tx_fd(net_dev, &fd[i]); /* TxConf FQ selection relies on queue id from the stack. * In case of a forwarded frame from another DPNI interface, we choose @@ -1175,27 +1370,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, queue_mapping %= dpaa2_eth_queue_count(priv); } fq = &priv->fq[queue_mapping]; - - fd_len = dpaa2_fd_get_len(&fd); nq = netdev_get_tx_queue(net_dev, queue_mapping); netdev_tx_sent_queue(nq, fd_len); /* Everything that happens after this enqueues might race with * the Tx confirmation callback for this frame */ - for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = priv->enqueue(priv, fq, &fd, prio, 1, NULL); - if (err != -EBUSY) - break; + max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES; + while (total_enqueued < num_fds && retries < max_retries) { + err = priv->enqueue(priv, fq, &fd[total_enqueued], + prio, num_fds - total_enqueued, &enqueued); + if (err == -EBUSY) { + retries++; + continue; + } + + total_enqueued += enqueued; } - percpu_extras->tx_portal_busy += i; + percpu_extras->tx_portal_busy += retries; + if (unlikely(err < 0)) { percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ - dpaa2_eth_free_tx_fd(priv, fq, &fd, false); + dpaa2_eth_free_tx_fd(priv, fq, fd, false); netdev_tx_completed_queue(nq, 1, fd_len); } else { - percpu_stats->tx_packets++; + percpu_stats->tx_packets += total_enqueued; percpu_stats->tx_bytes += fd_len; } @@ -1523,7 +1723,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv) count = sgt_cache->count; for (i = 0; i < count; i++) - kfree(sgt_cache->buf[i]); + skb_free_frag(sgt_cache->buf[i]); sgt_cache->count = 0; } } @@ -4115,7 +4315,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) net_dev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_LLTX | NETIF_F_HW_TC; + NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; + net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; if (priv->dpni_attrs.vlan_filter_entries) @@ -4397,6 +4598,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) goto err_alloc_sgt_cache; } + priv->fd = alloc_percpu(*priv->fd); + if (!priv->fd) { + dev_err(dev, "alloc_percpu(fds) failed\n"); + err = -ENOMEM; + goto err_alloc_fds; + } + err = dpaa2_eth_netdev_init(net_dev); if (err) goto err_netdev_init; @@ -4484,6 +4692,8 @@ err_poll_thread: err_alloc_rings: err_csum: err_netdev_init: + free_percpu(priv->fd); +err_alloc_fds: free_percpu(priv->sgt_cache); err_alloc_sgt_cache: free_percpu(priv->percpu_extras); @@ -4539,6 +4749,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_free_irqs(ls_dev); dpaa2_eth_free_rings(priv); + free_percpu(priv->fd); free_percpu(priv->sgt_cache); free_percpu(priv->percpu_stats); free_percpu(priv->percpu_extras); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index e54e70ebdd05..b79831cd1a94 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type { DPAA2_ETH_SWA_SINGLE, DPAA2_ETH_SWA_SG, DPAA2_ETH_SWA_XDP, + DPAA2_ETH_SWA_SW_TSO, }; /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */ @@ -142,6 +143,12 @@ struct dpaa2_eth_swa { int dma_size; struct xdp_frame *xdpf; } xdp; + struct { + struct sk_buff *skb; + int num_sg; + int sgt_size; + int is_last_fd; + } tso; }; }; @@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats { __u64 tx_conf_bytes; __u64 tx_sg_frames; __u64 tx_sg_bytes; + __u64 tx_tso_frames; + __u64 tx_tso_bytes; __u64 rx_sg_frames; __u64 rx_sg_bytes; /* Linear skbs sent as a S/G FD due to insufficient headroom */ @@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data { struct dpaa2_eth_priv *priv; }; +#define DPAA2_ETH_SG_ENTRIES_MAX (PAGE_SIZE / sizeof(struct scatterlist)) + #define DPAA2_ETH_DEFAULT_COPYBREAK 512 +#define DPAA2_ETH_ENQUEUE_MAX_FDS 200 +struct dpaa2_eth_fds { + struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS]; +}; + /* Driver private data */ struct dpaa2_eth_priv { struct net_device *net_dev; @@ -577,6 +593,8 @@ struct dpaa2_eth_priv { struct devlink_port devlink_port; u32 rx_copybreak; + + struct dpaa2_eth_fds __percpu *fd; }; struct dpaa2_eth_devlink_priv { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 3fdbf87dccb1..eea7d7a07c00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = { "[drv] tx conf bytes", "[drv] tx sg frames", "[drv] tx sg bytes", + "[drv] tx tso frames", + "[drv] tx tso bytes", "[drv] rx sg frames", "[drv] rx sg bytes", "[drv] tx converted sg frames", diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index 058b78fac5e3..0a3fb6c115f4 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -743,8 +743,8 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; - p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; - s->eqcr.ci = *p & full_mask; + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); + s->eqcr.ci &= full_mask; s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, eqcr_ci, s->eqcr.ci); if (!s->eqcr.available) { @@ -887,8 +887,8 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; - p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; - s->eqcr.ci = *p & full_mask; + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); + s->eqcr.ci &= full_mask; s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, eqcr_ci, s->eqcr.ci); if (!s->eqcr.available) |