From 60beb2eef0388d851e5515dfad296b9016d57d25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Dec 2015 14:08:53 -0800 Subject: net: fix IP early demux races [ Upstream commit 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 ] David Wilder reported crashes caused by dst reuse. I am seeing a crash on a distro V4.2.3 kernel caused by a double release of a dst_entry. In ipv4_dst_destroy() the call to list_empty() finds a poisoned next pointer, indicating the dst_entry has already been removed from the list and freed. The crash occurs 18 to 24 hours into a run of a network stress exerciser. Thanks to his detailed report and analysis, we were able to understand the core issue. IP early demux can associate a dst to skb, after a lookup in TCP/UDP sockets. When socket cache is not properly set, we want to store into sk->sk_dst_cache the dst for future IP early demux lookups, by acquiring a stable refcount on the dst. Problem is this acquisition is simply using an atomic_inc(), which works well, unless the dst was queued for destruction from dst_release() noticing dst refcount went to zero, if DST_NOCACHE was set on dst. We need to make sure current refcount is not zero before incrementing it, or risk double free as David reported. This patch, being a stable candidate, adds two new helpers, and use them only from IP early demux problematic paths. It might be possible to merge in net-next skb_dst_force() and skb_dst_force_safe(), but I prefer having the smallest patch for stable kernels : Maybe some skb_dst_force() callers do not expect skb->dst can suddenly be cleared. Can probably be backported back to linux-3.6 kernels Reported-by: David J. Wilder Tested-by: David J. Wilder Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Jiri Slaby --- include/net/dst.h | 33 +++++++++++++++++++++++++++++++++ include/net/sock.h | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 30cd2f9cd1dd..d30afbdc1a59 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -306,6 +306,39 @@ static inline void skb_dst_force(struct sk_buff *skb) } } +/** + * dst_hold_safe - Take a reference on a dst if possible + * @dst: pointer to dst entry + * + * This helper returns false if it could not safely + * take a reference on a dst. + */ +static inline bool dst_hold_safe(struct dst_entry *dst) +{ + if (dst->flags & DST_NOCACHE) + return atomic_inc_not_zero(&dst->__refcnt); + dst_hold(dst); + return true; +} + +/** + * skb_dst_force_safe - makes sure skb dst is refcounted + * @skb: buffer + * + * If dst is not yet refcounted and not destroyed, grab a ref on it. + */ +static inline void skb_dst_force_safe(struct sk_buff *skb) +{ + if (skb_dst_is_noref(skb)) { + struct dst_entry *dst = skb_dst(skb); + + if (!dst_hold_safe(dst)) + dst = NULL; + + skb->_skb_refdst = (unsigned long)dst; + } +} + /** * __skb_tunnel_rx - prepare skb for rx reinsert diff --git a/include/net/sock.h b/include/net/sock.h index 41d98f1d0459..6ed6df149bce 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -760,7 +760,7 @@ extern void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force(skb); + skb_dst_force_safe(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; -- cgit v1.2.3