summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c114
1 files changed, 110 insertions, 4 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 62504471fd20..ac4a317038f1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <net/xdp_sock_drv.h>
+#include <net/busy_poll.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -240,6 +241,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+ sk_mark_napi_id_once_xdp(&xs->sk, xdp);
len = xdp->data_end - xdp->data;
return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
@@ -341,6 +343,63 @@ out:
}
EXPORT_SYMBOL(xsk_tx_peek_desc);
+static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
+ u32 max_entries)
+{
+ u32 nb_pkts = 0;
+
+ while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
+ nb_pkts++;
+
+ xsk_tx_release(pool);
+ return nb_pkts;
+}
+
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
+ u32 max_entries)
+{
+ struct xdp_sock *xs;
+ u32 nb_pkts;
+
+ rcu_read_lock();
+ if (!list_is_singular(&pool->xsk_tx_list)) {
+ /* Fallback to the non-batched version */
+ rcu_read_unlock();
+ return xsk_tx_peek_release_fallback(pool, descs, max_entries);
+ }
+
+ xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
+ if (!xs) {
+ nb_pkts = 0;
+ goto out;
+ }
+
+ nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
+ if (!nb_pkts) {
+ xs->tx->queue_empty_descs++;
+ goto out;
+ }
+
+ /* This is the backpressure mechanism for the Tx path. Try to
+ * reserve space in the completion queue for all packets, but
+ * if there are fewer slots available, just process that many
+ * packets. This avoids having to implement any buffering in
+ * the Tx path.
+ */
+ nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
+ if (!nb_pkts)
+ goto out;
+
+ xskq_cons_release_n(xs->tx, nb_pkts);
+ __xskq_cons_release(xs->tx);
+ xs->sk.sk_write_space(&xs->sk);
+
+out:
+ rcu_read_unlock();
+ return nb_pkts;
+}
+EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
+
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
@@ -464,18 +523,65 @@ static int __xsk_sendmsg(struct sock *sk)
return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
}
+static bool xsk_no_wakeup(struct sock *sk)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ /* Prefer busy-polling, skip the wakeup. */
+ return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) &&
+ READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID;
+#else
+ return false;
+#endif
+}
+
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
+ struct xsk_buff_pool *pool;
if (unlikely(!xsk_is_bound(xs)))
return -ENXIO;
if (unlikely(need_wait))
return -EOPNOTSUPP;
- return __xsk_sendmsg(sk);
+ if (sk_can_busy_loop(sk))
+ sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+
+ if (xsk_no_wakeup(sk))
+ return 0;
+
+ pool = xs->pool;
+ if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
+ return __xsk_sendmsg(sk);
+ return 0;
+}
+
+static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+{
+ bool need_wait = !(flags & MSG_DONTWAIT);
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (unlikely(!xsk_is_bound(xs)))
+ return -ENXIO;
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+ if (unlikely(!xs->rx))
+ return -ENOBUFS;
+ if (unlikely(need_wait))
+ return -EOPNOTSUPP;
+
+ if (sk_can_busy_loop(sk))
+ sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+
+ if (xsk_no_wakeup(sk))
+ return 0;
+
+ if (xs->pool->cached_need_wakeup & XDP_WAKEUP_RX && xs->zc)
+ return xsk_wakeup(xs, XDP_WAKEUP_RX);
+ return 0;
}
static __poll_t xsk_poll(struct file *file, struct socket *sock,
@@ -554,7 +660,7 @@ static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
node);
if (node) {
- WARN_ON(xsk_map_inc(node->map));
+ bpf_map_inc(&node->map->map);
map = node->map;
*map_entry = node->map_entry;
}
@@ -584,7 +690,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
xsk_map_try_sock_delete(map, xs, map_entry);
- xsk_map_put(map);
+ bpf_map_put(&map->map);
}
}
@@ -1140,7 +1246,7 @@ static const struct proto_ops xsk_proto_ops = {
.setsockopt = xsk_setsockopt,
.getsockopt = xsk_getsockopt,
.sendmsg = xsk_sendmsg,
- .recvmsg = sock_no_recvmsg,
+ .recvmsg = xsk_recvmsg,
.mmap = xsk_mmap,
.sendpage = sock_no_sendpage,
};