diff options
Diffstat (limited to 'net')
58 files changed, 482 insertions, 285 deletions
diff --git a/net/atm/lec.c b/net/atm/lec.c index d7f5cf5b7594..ad4f829193f0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9a580999ca57..d892b7c3cc42 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, struct sock *sk = sock->sk; int err = 0; - BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr_len < sizeof(struct sockaddr_sco) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5ea7e56119c1..ba303ee99b9b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9c07591b0232..7104cf13da84 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) || nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT, - br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT))) + br_opt_get(br, BROPT_VLAN_STATS_PER_PORT))) return -EMSGSIZE; #endif #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index eb15891f8b9f..3cad01ac64e4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + /* rule should have no remaining data after target */ + if (type == EBT_COMPAT_TARGET && size_left) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; diff --git a/net/core/filter.c b/net/core/filter.c index fc92ebc4e200..27e61ffd9039 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4383,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, * Only binding to IP is supported. */ err = -EINVAL; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return err; if (addr->sa_family == AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) return err; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f8f94303a1f5..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1747,20 +1747,16 @@ int netdev_register_kobject(struct net_device *ndev) error = device_add(dev); if (error) - goto error_put_device; + return error; error = register_queue_kobjects(ndev); - if (error) - goto error_device_del; + if (error) { + device_del(dev); + return error; + } pm_runtime_set_memalloc_noio(dev, true); - return 0; - -error_device_del: - device_del(dev); -error_put_device: - put_device(dev); return error; } diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 703cf76aa7c2..7109c168b5e0 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -185,9 +185,10 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog_kern ptp_prog = { - .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, - }; + struct sock_fprog_kern ptp_prog; + + ptp_prog.len = ARRAY_SIZE(ptp_filter); + ptp_prog.filter = ptp_filter; BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a51cab95ba64..220c56e93659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, { struct if_stats_msg *ifsm; - if (nlh->nlmsg_len < sizeof(*ifsm)) { + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef2cd5712098..40796b8bf820 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { - int mac_len; + int mac_len, meta_len; + void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); @@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } + + meta_len = skb_metadata_len(skb); + if (meta_len) { + meta = skb_metadata_end(skb) - meta_len; + memmove(meta + VLAN_HLEN, meta, meta_len); + } + skb->mac_header += VLAN_HLEN; return skb; } diff --git a/net/core/sock.c b/net/core/sock.c index 782343bb925b..067878a1e4c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; } - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; *(struct old_timeval32 *)optval = tv32; return sizeof(tv32); @@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool { struct __kernel_sock_timeval tv; - if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; if (optlen < sizeof(tv32)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5da63e5faa2..6fdf1c195d8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1183,11 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } +static void ipv4_send_dest_unreach(struct sk_buff *skb) +{ + struct ip_options opt; + int res; + + /* Recompile ip options since IPCB may not be valid anymore. + * Also check we have a reasonable ipv4 header. + */ + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) + return; + + memset(&opt, 0, sizeof(opt)); + if (ip_hdr(skb)->ihl > 5) { + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) + return; + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) + return; + } + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); +} + static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ba0fc4b18465..eeb4041fa5f9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; +static int one_day_secs = 24 * 3600; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_day_secs }, { .procname = "tcp_autocorking", diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 359da68d7c06..477cb4aa456c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -49,9 +49,8 @@ #define DCTCP_MAX_ALPHA 1024U struct dctcp { - u32 acked_bytes_ecn; - u32 acked_bytes_total; - u32 prior_snd_una; + u32 old_delivered; + u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; @@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; - ca->acked_bytes_ecn = 0; - ca->acked_bytes_total = 0; + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; } static void dctcp_init(struct sock *sk) @@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk) sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); - ca->prior_snd_una = tp->snd_una; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); @@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; - - /* If ack did not advance snd_una, count dupack as MSS size. - * If ack did update window, do not count it at all. - */ - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; - if (acked_bytes) { - ca->acked_bytes_total += acked_bytes; - ca->prior_snd_una = tp->snd_una; - - if (flags & CA_ACK_ECE) - ca->acked_bytes_ecn += acked_bytes; - } /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { - u64 bytes_ecn = ca->acked_bytes_ecn; + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (bytes_ecn) { + if (delivered_ce) { + u32 delivered = tp->delivered - ca->old_delivered; + /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 Mbytes. + * after 8 M packets. */ - bytes_ecn <<= (10 - dctcp_shift_g); - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_ab_ecn = tp->mss_cache * + (tp->delivered_ce - ca->old_delivered_ce); + info->dctcp.dctcp_ab_tot = tp->mss_cache * + (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5def3c48870e..731d3045b50a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + int room; + + room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; /* Check #1 */ - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_under_memory_pressure(sk)) { + if (room > 0 && !tcp_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) if (incr) { incr = max_t(int, incr, 2 * skb->len); - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, - tp->window_clamp); + tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d43d076c98f5..1766325423b5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, } if (nlmsg_attrlen(nlh, sizeof(*ifal))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst"); + NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request"); return -EINVAL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0302e0eb07af..7178e32eb15d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2330,6 +2330,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); from = rcu_dereference(rt6->from); + if (!from) { + rcu_read_unlock(); + return; + } nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b444483cdb2b..622eeaf5732b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk) static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; /* The following checks are replicated from __ip6_datagram_connect() * and intended to prevent BPF program called below from accessing * bytes that are out of the bound specified by user in addr_len. diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b99e73a7e7e0..2017b7d780f5 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct llc_sap *sap; int rc = -EINVAL; - dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); - lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; if (unlikely(addr->sllc_family != AF_LLC)) goto out; + dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index dc07fcc7938e..802db01e3075 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/skbuff.h> #include <net/ncsi.h> @@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); /* Increase mac address by 1 for BMC's address */ - saddr.sa_data[ETH_ALEN - 1]++; + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + ret = ops->ndo_set_mac_address(ndev, &saddr); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 43bbaa32b1d6..14457551bcb4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v; - if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 82bfbeef46af..2a714527cde1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> @@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { @@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); - if (tstamp) { - if (skb->tstamp == 0) - __net_timestamp(skb); + if (tstamp) + tstamp->start = ktime_get_real_ns(); - tstamp->start = ktime_to_ns(skb->tstamp); - } /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above @@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; + ct->timeout = 0; write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66c596d287a5..d7f61b0547c6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/siphash.h> #include <linux/netfilter.h> #include <net/netlink.h> @@ -485,7 +486,9 @@ nla_put_failure: static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2692,6 +2696,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b9403a266a2e..37bb530d848f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, struct va_format vaf; va_list args; - if (net->ct.sysctl_log_invalid != protonum || + if (net->ct.sysctl_log_invalid != protonum && net->ct.sysctl_log_invalid != IPPROTO_RAW) return; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 7df477996b16..9becac953587 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct, return NF_ACCEPT; } -/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ -static int -icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, - const struct nf_hook_state *state) +/* Check inner header is related to any of the existing connections */ +int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state, + u8 l4proto, union nf_inet_addr *outer_daddr) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; + union nf_inet_addr *ct_daddr; + enum ip_conntrack_dir dir; + struct nf_conn *ct; WARN_ON(skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) + ip_hdrlen(skb) - + sizeof(struct icmphdr), - PF_INET, state->net, &origtuple)) { - pr_debug("icmp_error_message: failed to get tuple\n"); + if (!nf_ct_get_tuplepr(skb, dataoff, + state->pf, state->net, &origtuple)) return -NF_ACCEPT; - } /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple)) { - pr_debug("icmp_error_message: no match\n"); + if (!nf_ct_invert_tuple(&innertuple, &origtuple)) return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; h = nf_conntrack_find_get(state->net, zone, &innertuple); - if (!h) { - pr_debug("icmp_error_message: no match\n"); + if (!h) + return -NF_ACCEPT; + + /* Consider: A -> T (=This machine) -> B + * Conntrack entry will look like this: + * Original: A->B + * Reply: B->T (SNAT case) OR A + * + * When this function runs, we got packet that looks like this: + * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..). + * + * Above nf_conntrack_find_get() makes lookup based on inner_hdr, + * so we should expect that destination of the found connection + * matches outer header destination address. + * + * In above example, we can consider these two cases: + * 1. Error coming in reply direction from B or M (middle box) to + * T (SNAT case) or A. + * Inner saddr will be B, dst will be T or A. + * The found conntrack will be reply tuple (B->T/A). + * 2. Error coming in original direction from A or M to B. + * Inner saddr will be A, inner daddr will be B. + * The found conntrack will be original tuple (A->B). + * + * In both cases, conntrack[dir].dst == inner.dst. + * + * A bogus packet could look like this: + * Inner: B->T + * Outer: B->X (other machine reachable by T). + * + * In this case, lookup yields connection A->B and will + * set packet from B->X as *RELATED*, even though no connection + * from X was ever seen. + */ + ct = nf_ct_tuplehash_to_ctrack(h); + dir = NF_CT_DIRECTION(h); + ct_daddr = &ct->tuplehash[dir].tuple.dst.u3; + if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) { + if (state->pf == AF_INET) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI4 != inner %pI4", + &outer_daddr->ip, &ct_daddr->ip); + } else if (state->pf == AF_INET6) { + nf_l4proto_log_invalid(skb, state->net, state->pf, + l4proto, + "outer daddr %pI6 != inner %pI6", + &outer_daddr->ip6, &ct_daddr->ip6); + } + nf_ct_put(ct); return -NF_ACCEPT; } - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + ctinfo = IP_CT_RELATED; + if (dir == IP_CT_DIR_REPLY) ctinfo += IP_CT_IS_REPLY; /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); + nf_ct_set(skb, ct, ctinfo); return NF_ACCEPT; } @@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmphdr *icmph; struct icmphdr _ih; /* Not enough header? */ - icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); + icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmph == NULL) { icmp_error_log(skb, state, "short packet"); return -NF_ACCEPT; @@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(tmpl, skb, state); + memset(&outer_daddr, 0, sizeof(outer_daddr)); + outer_daddr.ip = ip_hdr(skb)->daddr; + + dataoff += sizeof(*icmph); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMP, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index bec4a3211658..c63ee3612855 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct, return NF_ACCEPT; } -static int -icmpv6_error_message(struct net *net, struct nf_conn *tmpl, - struct sk_buff *skb, - unsigned int icmp6off) -{ - struct nf_conntrack_tuple intuple, origtuple; - const struct nf_conntrack_tuple_hash *h; - enum ip_conntrack_info ctinfo; - struct nf_conntrack_zone tmp; - - WARN_ON(skb_nfct(skb)); - - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuplepr(skb, - skb_network_offset(skb) - + sizeof(struct ipv6hdr) - + sizeof(struct icmp6hdr), - PF_INET6, net, &origtuple)) { - pr_debug("icmpv6_error: Can't get tuple\n"); - return -NF_ACCEPT; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&intuple, &origtuple)) { - pr_debug("icmpv6_error: Can't invert tuple\n"); - return -NF_ACCEPT; - } - - ctinfo = IP_CT_RELATED; - - h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), - &intuple); - if (!h) { - pr_debug("icmpv6_error: no match\n"); - return -NF_ACCEPT; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); - return NF_ACCEPT; -} static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, @@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, unsigned int dataoff, const struct nf_hook_state *state) { + union nf_inet_addr outer_daddr; const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; int type; @@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(state->net, tmpl, skb, dataoff); + memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, + sizeof(outer_daddr.ip6)); + dataoff += sizeof(*icmp6h); + return nf_conntrack_inet_error(tmpl, skb, dataoff, state, + IPPROTO_ICMPV6, &outer_daddr); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index af7dc6537758..000952719adf 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_ICMPV6: /* id is same for either direction... */ keyptr = &tuple->src.u.icmp.id; - min = range->min_proto.icmp.id; - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 0; + range_size = 65536; + } else { + min = ntohs(range->min_proto.icmp.id); + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + } goto find_free_id; #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) case IPPROTO_GRE: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ef7772e976cc..1606eaa5ae0d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net, if (IS_ERR(type)) return PTR_ERR(type); } - if (!(type->hook_mask & (1 << hook->num))) + if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; if (type->type == NFT_CHAIN_T_NAT && diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1f9c5303f02..0b3347570265 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; } - if (skb->tstamp) { + if (hooknum <= NF_INET_FORWARD && skb->tstamp) { struct nfulnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(skb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 0dcc3592d053..e057b2961d31 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_bridge(entry, skb) < 0) goto nla_put_failure; - if (entskb->tstamp) { + if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) { struct nfqnl_msg_packet_timestamp ts; struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index c13bcd0ab491..8dbb4d48f2ed 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) s64 stamp; /* - * We cannot use get_seconds() instead of __net_timestamp() here. + * We need real time here, but we can neither use skb->tstamp + * nor __net_timestamp(). + * + * skb->tstamp and skb->skb_mstamp_ns overlap, however, they + * use different clock types (real vs monotonic). + * * Suppose you have two rules: - * 1. match before 13:00 - * 2. match after 13:00 + * 1. match before 13:00 + * 2. match after 13:00 + * * If you match against processing time (get_seconds) it * may happen that the same packet matches both rules if - * it arrived at the right moment before 13:00. + * it arrived at the right moment before 13:00, so it would be + * better to check skb->tstamp and set it via __net_timestamp() + * if needed. This however breaks outgoing packets tx timestamp, + * and causes them to get delayed forever by fq packet scheduler. */ - if (skb->tstamp == 0) - __net_timestamp((struct sk_buff *)skb); - - stamp = ktime_to_ns(skb->tstamp); - stamp = div_s64(stamp, NSEC_PER_SEC); + stamp = get_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f28e937320a3..216ab915dd54 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err = 0; - unsigned long groups = nladdr->nl_groups; + unsigned long groups; bool bound; if (addr_len < sizeof(struct sockaddr_nl)) @@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_family != AF_NETLINK) return -EINVAL; + groups = nladdr->nl_groups; /* Only superuser is allowed to listen multicasts */ if (groups) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d3144d19903..71ffd1a6dc7c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void) int i; int rc = proto_register(&nr_proto, 0); - if (rc != 0) - goto out; + if (rc) + return rc; if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n"); - return -1; + pr_err("NET/ROM: %s - nr_ndevs parameter too large\n", + __func__); + rc = -EINVAL; + goto unregister_proto; } dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL); - if (dev_nr == NULL) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); - return -1; + if (!dev_nr) { + pr_err("NET/ROM: %s - unable to allocate device array\n", + __func__); + rc = -ENOMEM; + goto unregister_proto; } for (i = 0; i < nr_ndevs; i++) { @@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void) sprintf(name, "nr%d", i); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); + rc = -ENOMEM; goto fail; } dev->base_addr = i; - if (register_netdev(dev)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n"); + rc = register_netdev(dev); + if (rc) { free_netdev(dev); goto fail; } @@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void) dev_nr[i] = dev; } - if (sock_register(&nr_family_ops)) { - printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n"); + rc = sock_register(&nr_family_ops); + if (rc) goto fail; - } - register_netdevice_notifier(&nr_dev_notifier); + rc = register_netdevice_notifier(&nr_dev_notifier); + if (rc) + goto out_sock; ax25_register_pid(&nr_pid); ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL - nr_register_sysctl(); + rc = nr_register_sysctl(); + if (rc) + goto out_sysctl; #endif nr_loopback_init(); - proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops); - proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops); - proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops); -out: - return rc; + rc = -ENOMEM; + if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops)) + goto proc_remove1; + if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net, + &nr_neigh_seqops)) + goto proc_remove2; + if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net, + &nr_node_seqops)) + goto proc_remove3; + + return 0; + +proc_remove3: + remove_proc_entry("nr_neigh", init_net.proc_net); +proc_remove2: + remove_proc_entry("nr", init_net.proc_net); +proc_remove1: + + nr_loopback_clear(); + nr_rt_free(); + +#ifdef CONFIG_SYSCTL + nr_unregister_sysctl(); +out_sysctl: +#endif + ax25_linkfail_release(&nr_linkfail_notifier); + ax25_protocol_release(AX25_P_NETROM); + unregister_netdevice_notifier(&nr_dev_notifier); +out_sock: + sock_unregister(PF_NETROM); fail: while (--i >= 0) { unregister_netdev(dev_nr[i]); free_netdev(dev_nr[i]); } kfree(dev_nr); +unregister_proto: proto_unregister(&nr_proto); - rc = -1; - goto out; + return rc; } module_init(nr_proto_init); diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 215ad22a9647..93d13f019981 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused) } } -void __exit nr_loopback_clear(void) +void nr_loopback_clear(void) { del_timer_sync(&loopback_timer); skb_queue_purge(&loopback_queue); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6485f593e2f0..b76aa668a94b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = { /* * Free all memory associated with the nodes and routes lists. */ -void __exit nr_rt_free(void) +void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index ba1c368b3f18..771011b84270 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = { { } }; -void __init nr_register_sysctl(void) +int __init nr_register_sysctl(void) { nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table); + if (!nr_table_header) + return -ENOMEM; + return 0; } void nr_unregister_sysctl(void) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index d6cc97fbbbb0..2b969f99ef13 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; + lock_sock(sk); switch (uaddr->sa_family) { diff --git a/net/rds/bind.c b/net/rds/bind.c index 17c9d9f0c848..0f4398e7f2a7 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* We allow an RDS socket to be bound to either IPv4 or IPv6 * address. */ + if (addr_len < offsetofend(struct sockaddr, sa_family)) + return -EINVAL; if (uaddr->sa_family == AF_INET) { struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 31cf37da4510..93c0437e6a5f 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) else pool = rds_ibdev->mr_1m_pool; + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); + + /* Switch pools if one of the pool is reaching upper limit */ + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { + if (pool->pool_type == RDS_IB_MR_8K_POOL) + pool = rds_ibdev->mr_1m_pool; + else + pool = rds_ibdev->mr_8k_pool; + } + ibmr = rds_ib_try_reuse_ibmr(pool); if (ibmr) return ibmr; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 63c8d107adcf..d664e9ade74d 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) struct rds_ib_mr *ibmr = NULL; int iter = 0; - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 7af4f99c4a93..094a6621f8e8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,6 +16,7 @@ #include <linux/init.h> static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); @@ -35,29 +36,27 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; + struct sk_buff *skbn = NULL; - skbn = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } static void rose_loopback_timer(struct timer_list *unused) @@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 96f2952bbdfd..ae8c5d7f3bf1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct rxrpc_local *local; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - u16 service_id = srx->srx_service; + u16 service_id; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) ret = rxrpc_validate_address(rx, srx, len); if (ret < 0) goto error; + service_id = srx->srx_service; lock_sock(&rx->sk); @@ -370,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check + * @_life: Where to store the life value * * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Returns a number representing the life state - * which can be compared to that returned by a previous call. + * getting ACKs from the server. Passes back in *_life a number representing + * the life state which can be compared to that returned by a previous call and + * return true if the call is still alive. * * If the life state stalls, rxrpc_kernel_probe_life() should be called and * then 2RTT waited. */ -u32 rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call) +bool rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call, + u32 *_life) { - return call->acks_latest; + *_life = call->acks_latest; + return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4b1a534d290a..062ca9dc29b8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -654,6 +654,7 @@ struct rxrpc_call { u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b6fca8ebb117..8d31fb4c51e1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl) + enum rxrpc_call_completion compl, + rxrpc_serial_t serial) { struct rxrpc_call *call; int i; @@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, call->call_id, 0, conn->abort_code, conn->error); + else + trace_rxrpc_rx_abort(call, serial, + conn->abort_code); if (rxrpc_set_call_completion(call, compl, conn->abort_code, conn->error)) @@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); - msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; @@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); @@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, conn->error = -ECONNABORTED; conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9128aa0e40aa..c2c35cf4e308 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, u8 acks[RXRPC_MAXACKS]; } buf; rxrpc_serial_t acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); + prev_pkt = ntohl(buf.ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = buf.ack.nAcks; summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? buf.ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial, - first_soft_ack, ntohl(buf.ack.previousPacket), + first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) @@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (outside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) return; buf.info.rxMTU = 0; @@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, spin_lock(&call->input_lock); - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) + /* Discard any out-of-order or duplicate ACKs (inside lock). */ + if (before(first_soft_ack, call->ackr_first_seq) || + before(prev_pkt, call->ackr_prev_seq)) goto out; call->acks_latest_ts = skb->tstamp; call->acks_latest = sp->hdr.serial; + call->ackr_first_seq = first_soft_ack; + call->ackr_prev_seq = prev_pkt; + /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); @@ -1155,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * handle data received on the local endpoint * - may be called in interrupt context * - * The socket is locked by the caller and this prevents the socket from being - * shut down and the local endpoint from going away, thus sk_user_data will not - * be cleared until this function returns. + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. * * Called with the RCU read lock held from the IP layer via UDP. */ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); struct rxrpc_connection *conn; struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; @@ -1175,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) _enter("%p", udp_sk); + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 15cf42d5b53a..01959db51445 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -304,7 +304,8 @@ nomem: ret = -ENOMEM; sock_error: mutex_unlock(&rxnet->local_mutex); - kfree(local); + if (local) + call_rcu(&local->rcu, rxrpc_local_rcu); _leave(" = %d", ret); return ERR_PTR(ret); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bc05af89fc38..6e84d878053c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk) _enter("%p{%d}", sk, local->debug_id); + /* Clear the outstanding error value on the socket so that it doesn't + * cause kernel_sendmsg() to return it later. + */ + sock_error(sk); + skb = sock_dequeue_err_skb(sk); if (!skb) { _leave("UDP socket errqueue empty"); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 46c9312085b1..bec64deb7b0a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, } /* - * Queue a DATA packet for transmission, set the resend timeout and send the - * packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send + * the packet immediately. Returns the error from rxrpc_send_data_packet() + * in case the caller wants to do something with it. */ -static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct sk_buff *skb, bool last, - rxrpc_notify_end_tx_t notify_end_tx) +static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long now; @@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, out: rxrpc_free_skb(skb, rxrpc_skb_tx_freed); - _leave(""); + _leave(" = %d", ret); + return ret; } /* @@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (ret < 0) goto out; - rxrpc_queue_packet(rx, call, skb, - !msg_data_left(msg) && !more, - notify_end_tx); + ret = rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); + /* Should check for failure here */ skb = NULL; } } while (msg_data_left(msg) > 0); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9874e60c9b0d..4583fa914e62 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4847,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* Validate addr_len before calling common connect/connectx routine. */ - af = sctp_get_af_specific(addr->sa_family); + af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL : + sctp_get_af_specific(addr->sa_family); if (!af || addr_len < af->sockaddr_len) { err = -EINVAL; } else { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 12bb23b8e0c5..261131dfa1f1 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } +static inline int cache_is_valid(struct cache_head *h); static void cache_fresh_locked(struct cache_head *head, time_t expiry, struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, @@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init_rcu(&tmp->cache_list); detail->entries --; + if (cache_is_valid(tmp) == -EAGAIN) + set_bit(CACHE_NEGATIVE, &tmp->flags); cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 187d10443a15..8ff11dc98d7f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); - call_reserve(task); } /* @@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; - call_refresh(task); return; } @@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; - call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } /* @@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); - if (rpc_task_need_resched(task)) - return; - call_refreshresult(task); } /* @@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; - call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); - call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) { - call_encode(task); + if (req->rq_buffer) return; - } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task) status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) { - if (rpc_task_need_resched(task)) - return; - call_encode(task); + if (status == 0) return; - } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_bind; - call_bind(task); + task->tk_action = call_transmit; + /* Check that the connection is OK */ + if (!xprt_bound(task->tk_xprt)) + task->tk_action = call_bind; + else if (!xprt_connected(task->tk_xprt)) + task->tk_action = call_connect; } /* @@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task) { xprt_end_transmit(task); task->tk_action = call_transmit_status; - call_transmit_status(task); } /* @@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task) if (xprt_bound(xprt)) { task->tk_action = call_connect; - call_connect(task); return; } @@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; - call_connect(task); return; } @@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task) if (xprt_connected(xprt)) { task->tk_action = call_transmit; - call_transmit(task); return; } @@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task) case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; - call_transmit(task); return; } rpc_exit(task, status); @@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); } xprt_end_transmit(task); - if (rpc_task_need_resched(task)) - return; - call_transmit_status(task); } /* @@ -2105,11 +2081,8 @@ call_transmit_status(struct rpc_task *task) * test first. */ if (rpc_task_transmitted(task)) { - if (task->tk_status == 0) - xprt_request_wait_receive(task); - if (rpc_task_need_resched(task)) - return; - call_status(task); + task->tk_status = 0; + xprt_request_wait_receive(task); return; } @@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task) { xprt_request_enqueue_transmit(task); task->tk_action = call_bc_transmit; - call_bc_transmit(task); } /* @@ -2195,6 +2167,9 @@ call_bc_transmit_status(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; + if (rpc_task_transmitted(task)) + task->tk_status = 0; + dprint_status(task); switch (task->tk_status) { @@ -2261,7 +2236,6 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; - call_decode(task); return; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 89a63391d4d4..30cfc0efe699 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -90,7 +90,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) /* Flush Receives, then wait for deferred Reply work * to complete. */ - ib_drain_qp(ia->ri_id->qp); + ib_drain_rq(ia->ri_id->qp); drain_workqueue(buf->rb_completion_wq); /* Deferred Reply processing might have scheduled diff --git a/net/tipc/link.c b/net/tipc/link.c index 341ecd796aa4..131aa2f0fd27 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_head_init(&list); l->in_session = false; + /* Force re-synch of peer session number before establishing */ + l->peer_session--; l->session++; l->mtu = l->advertised_mtu; diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 3481e4906bd6..9df82a573aa7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -38,6 +38,8 @@ #include <linux/sysctl.h> +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sk_filter", diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 9f3bdbc1e593..cc0256939eb6 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -904,7 +904,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_netdev; free_sw_resources: + up_read(&device_offload_lock); tls_sw_free_resources_rx(sk); + down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; release_netdev: @@ -939,8 +941,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk) } out: up_read(&device_offload_lock); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); tls_sw_release_resources_rx(sk); } diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 54c3a758f2a7..a3ebd4b02714 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -194,6 +194,9 @@ static void update_chksum(struct sk_buff *skb, int headln) static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) { + struct sock *sk = skb->sk; + int delta; + skb_copy_header(nskb, skb); skb_put(nskb, skb->len); @@ -201,11 +204,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) update_chksum(nskb, headln); nskb->destructor = skb->destructor; - nskb->sk = skb->sk; + nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; - refcount_add(nskb->truesize - skb->truesize, - &nskb->sk->sk_wmem_alloc); + + delta = nskb->truesize - skb->truesize; + if (likely(delta < 0)) + WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); + else if (delta) + refcount_add(delta, &sk->sk_wmem_alloc); } /* This function may be called after the user socket is already diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9547cea0ce3b..478603f43964 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -293,11 +293,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) #endif } - if (ctx->rx_conf == TLS_SW) { - kfree(ctx->rx.rec_seq); - kfree(ctx->rx.iv); + if (ctx->rx_conf == TLS_SW) tls_sw_free_resources_rx(sk); - } #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index b50ced862f6f..29d6af43dd24 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2078,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + kfree(tls_ctx->rx.rec_seq); + kfree(tls_ctx->rx.iv); + if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; |