From 675716400da6f15b9d3db04ef74ee74ca9a00af3 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 4 Jul 2019 17:25:03 +0300 Subject: xdp: fix possible cq entry leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completion queue address reservation could not be undone. In case of bad 'queue_id' or skb allocation failure, reserved entry will be leaked reducing the total capacity of completion queue. Fix that by moving reservation to the point where failure is not possible. Additionally, 'queue_id' checking moved out from the loop since there is no point to check it there. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Ilya Maximets Acked-by: Björn Töpel Tested-by: William Tu Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d4d6f10aa936..b994c32a664a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -240,6 +240,9 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, mutex_lock(&xs->mutex); + if (xs->queue_id >= xs->dev->real_num_tx_queues) + goto out; + while (xskq_peek_desc(xs->tx, &desc)) { char *buffer; u64 addr; @@ -250,12 +253,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, goto out; } - if (xskq_reserve_addr(xs->umem->cq)) - goto out; - - if (xs->queue_id >= xs->dev->real_num_tx_queues) - goto out; - len = desc.len; skb = sock_alloc_send_skb(sk, len, 1, &err); if (unlikely(!skb)) { @@ -267,7 +264,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, addr = desc.addr; buffer = xdp_umem_get_data(xs->umem, addr); err = skb_store_bits(skb, 0, buffer, len); - if (unlikely(err)) { + if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) { kfree_skb(skb); goto out; } -- cgit v1.2.3 From 5464c3a0e9a037b63d5229cdea08dddc01a98aac Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 8 Jul 2019 14:03:44 +0300 Subject: xdp: fix potential deadlock on socket mutex There are 2 call chains: a) xsk_bind --> xdp_umem_assign_dev b) unregister_netdevice_queue --> xsk_notifier with the following locking order: a) xs->mutex --> rtnl_lock b) rtnl_lock --> xdp.lock --> xs->mutex Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in the bind call chain (a). Reported-by: syzbot+bf64ec93de836d7f4c2c@syzkaller.appspotmail.com Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound to xdp socket") Signed-off-by: Ilya Maximets Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 16 ++++++---------- net/xdp/xsk.c | 2 ++ 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 20c91f02d3d8..83de74ca729a 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, struct netdev_bpf bpf; int err = 0; + ASSERT_RTNL(); + force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; if (force_zc && force_copy) return -EINVAL; - rtnl_lock(); - if (xdp_get_umem_from_qid(dev, queue_id)) { - err = -EBUSY; - goto out_rtnl_unlock; - } + if (xdp_get_umem_from_qid(dev, queue_id)) + return -EBUSY; err = xdp_reg_umem_at_qid(dev, umem, queue_id); if (err) - goto out_rtnl_unlock; + return err; umem->dev = dev; umem->queue_id = queue_id; @@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, if (force_copy) /* For copy-mode, we are done. */ - goto out_rtnl_unlock; + return 0; if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) { @@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = dev->netdev_ops->ndo_bpf(dev, &bpf); if (err) goto err_unreg_umem; - rtnl_unlock(); umem->zc = true; return 0; @@ -135,8 +133,6 @@ err_unreg_umem: err = 0; /* fallback to copy mode */ if (err) xdp_clear_umem_at_qid(dev, queue_id); -out_rtnl_unlock: - rtnl_unlock(); return err; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b994c32a664a..59b57d708697 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -430,6 +430,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) return -EINVAL; + rtnl_lock(); mutex_lock(&xs->mutex); if (xs->state != XSK_READY) { err = -EBUSY; @@ -515,6 +516,7 @@ out_unlock: xs->state = XSK_BOUND; out_release: mutex_unlock(&xs->mutex); + rtnl_unlock(); return err; } -- cgit v1.2.3 From b43995469e5804636a55372e9bbb17ccb22441c5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:52 -0700 Subject: bpf: rename bpf_ctx_wide_store_ok to bpf_ctx_wide_access_ok Rename bpf_ctx_wide_store_ok to bpf_ctx_wide_access_ok to indicate that it can be used for both loads and stores. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 2 +- net/core/filter.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6d944369ca87..ff65d22cf336 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -747,7 +747,7 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) return size <= size_default && (size & (size - 1)) == 0; } -#define bpf_ctx_wide_store_ok(off, size, type, field) \ +#define bpf_ctx_wide_access_ok(off, size, type, field) \ (size == sizeof(__u64) && \ off >= offsetof(type, field) && \ off + sizeof(__u64) <= offsetofend(type, field) && \ diff --git a/net/core/filter.c b/net/core/filter.c index 47f6386fb17a..c5983ddb1a9f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6890,14 +6890,14 @@ static bool sock_addr_is_valid_access(int off, int size, if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { - if (bpf_ctx_wide_store_ok(off, size, - struct bpf_sock_addr, - user_ip6)) + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + user_ip6)) return true; - if (bpf_ctx_wide_store_ok(off, size, - struct bpf_sock_addr, - msg_src_ip6)) + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) return true; if (size != size_default) -- cgit v1.2.3 From d4ecfeb15494ec261fef2d25d96eecba66f0b182 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 15 Jul 2019 09:39:53 -0700 Subject: bpf: allow wide aligned loads for bpf_sock_addr user_ip6 and msg_src_ip6 Add explicit check for u64 loads of user_ip6 and msg_src_ip6 and update the comment. Cc: Yonghong Song Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 4 ++-- net/core/filter.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6f68438aa4ed..81be929b89fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3248,7 +3248,7 @@ struct bpf_sock_addr { __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 user_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __u32 user_port; /* Allows 4-byte read and write. @@ -3260,7 +3260,7 @@ struct bpf_sock_addr { __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. * Stored in network byte order. */ - __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read and 4,8-byte write. + __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. * Stored in network byte order. */ __bpf_md_ptr(struct bpf_sock *, sk); diff --git a/net/core/filter.c b/net/core/filter.c index c5983ddb1a9f..0f6854ccf894 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6884,9 +6884,19 @@ static bool sock_addr_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], msg_src_ip6[3]): - /* Only narrow read access allowed for now. */ if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); + + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + user_ip6)) + return true; + + if (bpf_ctx_wide_access_ok(off, size, + struct bpf_sock_addr, + msg_src_ip6)) + return true; + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; } else { -- cgit v1.2.3