summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/caif/caif_socket.c1
-rw-r--r--net/can/isotp.c69
-rw-r--r--net/can/j1939/address-claim.c40
-rw-r--r--net/can/j1939/transport.c4
-rw-r--r--net/can/raw.c47
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/devlink.c14
-rw-r--r--net/core/gro.c9
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net_namespace.c10
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sock_map.c61
-rw-r--r--net/core/stream.c1
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv6/addrconf.c59
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/tcp_ipv6.c11
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/mac802154/rx.c1
-rw-r--r--net/mctp/af_mctp.c6
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mptcp/pm_netlink.c10
-rw-r--r--net/mptcp/protocol.c9
-rw-r--r--net/mptcp/sockopt.c11
-rw-r--r--net/mptcp/subflow.c12
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c5
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/openvswitch/datapath.c12
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/qrtr/ns.c5
-rw-r--r--net/rds/message.c6
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/sched/act_ctinfo.c6
-rw-r--r--net/sched/cls_tcindex.c34
-rw-r--r--net/sched/sch_htb.c5
-rw-r--r--net/sctp/diag.c4
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/socket.c15
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/xfrm/xfrm_compat.c4
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c18
52 files changed, 433 insertions, 221 deletions
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index f20f4373ff40..9554abcfd5b4 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv,
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
+ nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 748be7253248..78c9729a6057 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk)
return;
}
sk_stream_kill_queues(&cf_sk->sk);
+ WARN_ON_ONCE(sk->sk_forward_alloc);
caif_free_client(&cf_sk->layer);
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 608f8c24ae46..fc81d77724a1 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -140,7 +140,7 @@ struct isotp_sock {
canid_t rxid;
ktime_t tx_gap;
ktime_t lastrxcf_tstamp;
- struct hrtimer rxtimer, txtimer;
+ struct hrtimer rxtimer, txtimer, txfrtimer;
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
@@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data)
}
/* start timer to send next consecutive frame with correct delay */
- hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- switch (so->tx.state) {
- case ISOTP_SENDING:
-
- /* cfecho should be consumed by isotp_rcv_echo() here */
- if (!so->cfecho) {
- /* start timeout for unlikely lost echo skb */
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
- restart = HRTIMER_RESTART;
+ /* don't handle timeouts in IDLE state */
+ if (so->tx.state == ISOTP_IDLE)
+ return HRTIMER_NORESTART;
- /* push out the next consecutive frame */
- isotp_send_cframe(so);
- break;
- }
+ /* we did not get any flow control or echo frame in time */
- /* cfecho has not been cleared in isotp_rcv_echo() */
- pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
- fallthrough;
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
- case ISOTP_WAIT_FC:
- case ISOTP_WAIT_FIRST_FC:
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
- /* we did not get any flow control frame in time */
+ return HRTIMER_NORESTART;
+}
- /* report 'communication error on send' */
- sk->sk_err = ECOMM;
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txfrtimer);
- /* reset tx state */
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
- break;
+ /* start echo timeout handling and cover below protocol error */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
- default:
- WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
- so->tx.state, so->cfecho);
- }
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (so->tx.state == ISOTP_SENDING && !so->cfecho)
+ isotp_send_cframe(so);
- return restart;
+ return HRTIMER_NORESTART;
}
static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -1162,6 +1152,10 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ /* force state machines to be idle also when a signal occurred */
+ so->tx.state = ISOTP_IDLE;
+ so->rx.state = ISOTP_IDLE;
+
spin_lock(&isotp_notifier_lock);
while (isotp_busy_notifier == so) {
spin_unlock(&isotp_notifier_lock);
@@ -1194,6 +1188,7 @@ static int isotp_release(struct socket *sock)
}
}
+ hrtimer_cancel(&so->txfrtimer);
hrtimer_cancel(&so->txtimer);
hrtimer_cancel(&so->rxtimer);
@@ -1597,6 +1592,8 @@ static int isotp_init(struct sock *sk)
so->rxtimer.function = isotp_rx_timer_handler;
hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
so->txtimer.function = isotp_tx_timer_handler;
+ hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txfrtimer.function = isotp_txfr_timer_handler;
init_waitqueue_head(&so->wait);
spin_lock_init(&so->rx_lock);
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
index f33c47327927..ca4ad6cdd5cb 100644
--- a/net/can/j1939/address-claim.c
+++ b/net/can/j1939/address-claim.c
@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
* leaving this function.
*/
ecu = j1939_ecu_get_by_name_locked(priv, name);
+
+ if (ecu && ecu->addr == skcb->addr.sa) {
+ /* The ISO 11783-5 standard, in "4.5.2 - Address claim
+ * requirements", states:
+ * d) No CF shall begin, or resume, transmission on the
+ * network until 250 ms after it has successfully claimed
+ * an address except when responding to a request for
+ * address-claimed.
+ *
+ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim
+ * prioritization" show that the CF begins the transmission
+ * after 250 ms from the first AC (address-claimed) message
+ * even if it sends another AC message during that time window
+ * to resolve the address contention with another CF.
+ *
+ * As stated in "4.4.2.3 - Address-claimed message":
+ * In order to successfully claim an address, the CF sending
+ * an address claimed message shall not receive a contending
+ * claim from another CF for at least 250 ms.
+ *
+ * As stated in "4.4.3.2 - NAME management (NM) message":
+ * 1) A commanding CF can
+ * d) request that a CF with a specified NAME transmit
+ * the address-claimed message with its current NAME.
+ * 2) A target CF shall
+ * d) send an address-claimed message in response to a
+ * request for a matching NAME
+ *
+ * Taking the above arguments into account, the 250 ms wait is
+ * requested only during network initialization.
+ *
+ * Do not restart the timer on AC message if both the NAME and
+ * the address match and so if the address has already been
+ * claimed (timer has expired) or the AC message has been sent
+ * to resolve the contention with another CF (timer is still
+ * running).
+ */
+ goto out_ecu_put;
+ }
+
if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
ecu = j1939_ecu_create_locked(priv, name);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5c722b55fe23..fce9b9ebf13f 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
bool active;
j1939_session_list_lock(priv);
- /* This function should be called with a session ref-count of at
- * least 2.
- */
- WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
j1939_session_list_unlock(priv);
diff --git a/net/can/raw.c b/net/can/raw.c
index 81071cdb0301..ba86782ba8bb 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
- (can_is_canxl_skb(oskb) && !ro->xl_frames))
+ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
+ (!ro->xl_frames && can_is_canxl_skb(oskb)))
return;
/* eliminate multiple filter matches for the same skb */
@@ -670,6 +670,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames && !ro->fd_frames) {
+ ro->fd_frames = ro->xl_frames;
+ return -EINVAL;
+ }
break;
case CAN_RAW_XL_FRAMES:
@@ -679,6 +684,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames)
+ ro->fd_frames = ro->xl_frames;
break;
case CAN_RAW_JOIN_FILTERS:
@@ -786,6 +794,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+{
+ /* Classical CAN -> no checks for flags and device capabilities */
+ if (can_is_can_skb(skb))
+ return false;
+
+ /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
+ if (ro->fd_frames && can_is_canfd_skb(skb) &&
+ (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
+ return false;
+
+ /* CAN XL -> needs to be enabled and a CAN XL device */
+ if (ro->xl_frames && can_is_canxl_skb(skb) &&
+ can_is_canxl_dev_mtu(mtu))
+ return false;
+
+ return true;
+}
+
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -833,20 +860,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
- /* CAN XL, CAN FD and Classical CAN */
- if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
- !can_is_can_skb(skb))
- goto free_skb;
- } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- /* CAN FD and Classical CAN */
- if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
- goto free_skb;
- } else {
- /* Classical CAN */
- if (!can_is_can_skb(skb))
- goto free_skb;
- }
+ if (raw_bad_txframe(ro, skb, dev->mtu))
+ goto free_skb;
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
diff --git a/net/core/dev.c b/net/core/dev.c
index b76fb37b381e..f23e287602b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net,
__register_netdevice_notifier_net(dst_net, nb, true);
}
-void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
- struct notifier_block *nb)
-{
- rtnl_lock();
- __move_netdevice_notifier_net(src_net, dst_net, nb);
- rtnl_unlock();
-}
-
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
@@ -10375,7 +10367,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
- dst[i] = atomic_long_read(&src[i]);
+ dst[i] = (unsigned long)atomic_long_read(&src[i]);
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + n * sizeof(u64), 0,
sizeof(*stats64) - n * sizeof(u64));
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 032d6d0a5ce6..0bfc144df8b9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (err)
return err;
- if (dest_net && !net_eq(dest_net, curr_net)) {
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
+ if (dest_net && !net_eq(dest_net, curr_net))
write_pnet(&devlink->_net, dest_net);
- }
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
@@ -9979,7 +9976,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
goto err_xa_alloc;
devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
- ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb);
+ ret = register_netdevice_notifier(&devlink->netdevice_nb);
if (ret)
goto err_register_netdevice_notifier;
@@ -10171,8 +10168,7 @@ void devlink_free(struct devlink *devlink)
xa_destroy(&devlink->snapshot_ids);
xa_destroy(&devlink->ports);
- WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
- &devlink->netdevice_nb));
+ WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
xa_erase(&devlinks, devlink->index);
@@ -10503,6 +10499,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
break;
case NETDEV_REGISTER:
case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Set the netdev on top of previously set type. Note this
* event happens also during net namespace change so here
* we take into account netdev pointer appearing in this
@@ -10512,6 +10510,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
netdev);
break;
case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Clear netdev pointer, but not the type. This event happens
* also during net namespace change so we need to clear
* pointer to netdev that is going to another net namespace.
diff --git a/net/core/gro.c b/net/core/gro.c
index 506f83d715f8..4bac7ea6e025 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,6 +162,15 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
struct sk_buff *lp;
int segs;
+ /* Do not splice page pool based packets w/ non-page pool
+ * packets. This can result in reference count issues as page
+ * pool pages will not decrement the reference count and will
+ * instead be immediately returned to the pool or have frag
+ * count decremented.
+ */
+ if (p->pp_recycle != skb->pp_recycle)
+ return -ETOOMANYREFS;
+
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f00a79fc301b..4edd2176e238 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -269,7 +269,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
(n->nud_state == NUD_NOARP) ||
(tbl->is_multicast &&
tbl->is_multicast(n->primary_key)) ||
- time_after(tref, n->updated))
+ !time_in_range(n->updated, tref, jiffies))
remove = true;
write_unlock(&n->lock);
@@ -289,7 +289,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -1001,12 +1011,14 @@ static void neigh_periodic_work(struct work_struct *work)
goto next_elt;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
neigh_mark_dead(n);
write_unlock(&n->lock);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 078a0a420c8a..7b69cf882b8e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+/* init code that must occur even if setup_net() is not called. */
+static __net_init void preinit_net(struct net *net)
+{
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128);
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
refcount_set(&net->passive, 1);
get_random_bytes(&net->hash_mix, sizeof(u32));
@@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags,
rv = -ENOMEM;
goto dec_ucounts;
}
+
+ preinit_net(net);
refcount_set(&net->passive, 1);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -1118,6 +1125,7 @@ void __init net_ns_init(void)
init_net.key_domain = &init_net_key_domain;
#endif
down_write(&pernet_ops_rwsem);
+ preinit_net(&init_net);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a0eb5593275..a31ff4d83ecc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4100,7 +4100,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_shinfo(skb)->frag_list = NULL;
- do {
+ while (list_skb) {
nskb = list_skb;
list_skb = list_skb->next;
@@ -4146,8 +4146,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
if (skb_needs_linearize(nskb, features) &&
__skb_linearize(nskb))
goto err_linearize;
-
- } while (list_skb);
+ }
skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
diff --git a/net/core/sock.c b/net/core/sock.c
index f954d5893e79..6f27c24016fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1531,6 +1531,8 @@ set_sndbuf:
ret = -EINVAL;
break;
}
+ if ((u8)val == SOCK_TXREHASH_DEFAULT)
+ val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* Paired with READ_ONCE() in tcp_rtx_synack() */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -3451,7 +3453,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
- sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 22fa2c5bc6ec..a68a7290a3b2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
+ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ } else {
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
}
-
- saved_unhash = psock->saved_unhash;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
+ if (saved_unhash)
+ saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->destroy)
- sk->sk_prot->destroy(sk);
- return;
+ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ } else {
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
}
-
- saved_destroy = psock->saved_destroy;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- sk_psock_put(sk, psock);
- saved_destroy(sk);
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
+ if (saved_destroy)
+ saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+ } else {
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
}
-
- saved_close = psock->saved_close;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- release_sock(sk);
- cancel_work_sync(&psock->work);
- sk_psock_put(sk, psock);
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/core/stream.c b/net/core/stream.c
index cd06750dd329..434446ab14c5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
sk_mem_reclaim_final(sk);
WARN_ON_ONCE(sk->sk_wmem_queued);
- WARN_ON_ONCE(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4260fe466993..b9d7c3dd1cb3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
/* Clone pktoptions received with SYN, if we own the req */
if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
}
return newsk;
@@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
@@ -679,7 +677,6 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
memmove(IP6CB(opt_skb),
&DCCP_SKB_CB(opt_skb)->header.h6,
sizeof(struct inet6_skb_parm));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6c0ec2789943..cf11f10927e1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -347,6 +347,7 @@ lookup_protocol:
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
inet->mc_loop = 1;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d1f837579398..f2c43f67187d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1225,9 +1225,6 @@ int inet_csk_listen_start(struct sock *sk)
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
- if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
- sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 94aad3870c5f..cf26d65ca389 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
+#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7a84a4acffc..faa47f9ea73a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
offset = sizeof(struct in6_addr) - 4;
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
- if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
+ } else {
if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
return;
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
- } else {
- scope = IPV6_ADDR_COMPATv4;
- plen = 96;
- pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
@@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static void addrconf_init_auto_addrs(struct net_device *dev)
+{
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+}
+
static int fixup_permanent_addr(struct net *net,
struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
@@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch (dev->type) {
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- case ARPHRD_SIT:
- addrconf_sit_config(dev);
- break;
-#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
- case ARPHRD_IPGRE:
- addrconf_gre_config(dev);
- break;
-#endif
- case ARPHRD_LOOPBACK:
- init_loopback(dev);
- break;
-
- default:
- addrconf_dev_config(dev);
- break;
- }
+ addrconf_init_auto_addrs(dev);
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
@@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
}
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fee9163382c2..847934763868 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -222,6 +222,7 @@ lookup_protocol:
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e624497fa992..9b6818453afe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
- fl6->flowlabel = np->flow_label;
+ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6->flowi6_uid = sk->sk_uid;
if (!oif)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..a52a4f12f146 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
@@ -1387,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Clone pktoptions received with SYN, if we own the req */
if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_mask(sk, GFP_ATOMIC));
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions) {
+ if (newnp->pktoptions)
tcp_v6_restore_cb(newnp->pktoptions);
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
}
} else {
if (!req_unhash && found_dup_sk) {
@@ -1466,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ opt_skb = skb_clone_and_charge_r(skb, sk);
reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1552,7 +1550,6 @@ ipv6_pktoptions:
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 2bdbcec781cd..a815f5ab4c49 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
const struct sadb_x_nat_t_type* n_type;
struct xfrm_encap_tmpl *natt;
- x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
+ x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL);
if (!x->encap) {
err = -ENOMEM;
goto out;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c2aae2a6d6a6..97bb4401dd3e 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
- kfree_skb(skb);
return;
}
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 45bbe3e54cc2..3150f3f0c872 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -587,6 +587,11 @@ static void mctp_sk_unhash(struct sock *sk)
del_timer_sync(&msk->key_expiry);
}
+static void mctp_sk_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
static struct proto mctp_proto = {
.name = "MCTP",
.owner = THIS_MODULE,
@@ -623,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
return -ENOMEM;
sock_init_data(sock, sk);
+ sk->sk_destruct = mctp_sk_destruct;
rc = 0;
if (sk->sk_prot->init)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 35b5f806fdda..dc5165d3eec4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
free:
kfree(table);
out:
+ mdev->sysctl = NULL;
return -ENOBUFS;
}
@@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct net *net = dev_net(dev);
struct ctl_table *table;
+ if (!mdev->sysctl)
+ return;
+
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2ea7eae43bdb..10fe9771a852 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -998,8 +998,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct mptcp_sock *msk;
struct socket *ssock;
+ struct sock *newsk;
int backlog = 1024;
int err;
@@ -1008,11 +1008,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
- msk = mptcp_sk(entry->lsk->sk);
- if (!msk)
+ newsk = entry->lsk->sk;
+ if (!newsk)
return -EINVAL;
- ssock = __mptcp_nmpc_socket(msk);
+ lock_sock(newsk);
+ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ release_sock(newsk);
if (!ssock)
return -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8cd6cc67c2c5..bc6c1f62a690 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2897,6 +2897,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
+ int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -2922,6 +2923,8 @@ cleanup:
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
/* since the close timeout takes precedence on the fail one,
* cancel the latter
*/
@@ -2937,6 +2940,12 @@ cleanup:
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4b1e6ec1b36..7f2c3727ab23 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (struct sock *)msk;
struct socket *sock;
+ int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */
+ lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
- return -EINVAL;
+ goto unlock;
- return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+
+unlock:
+ release_sock(sk);
+ return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ec54413fb31f..32904c76c6a1 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1399,6 +1399,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
+ int ssk_state;
if (!err)
continue;
@@ -1409,7 +1410,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1679,7 +1687,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
if (err)
return err;
- lock_sock(sf->sk);
+ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 945dd40e7077..011d414038ea 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -142,10 +142,11 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#endif
+/* do_basic_checks ensures sch->length > 0, do not use before */
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
- ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))) && \
- (sch)->length; \
+ (offset) < (skb)->len && \
+ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
/* Some validity checks to make sure the chunks are fine */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6f7f4392cffb..5a4cb796150f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a71795355aec..fcee6012293b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1126,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 6e38f68f88c2..f2698d2316df 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = attach_meter(meter_tbl, meter);
if (err)
- goto exit_unlock;
+ goto exit_free_old_meter;
ovs_unlock();
@@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_free_old_meter:
+ ovs_meter_free(old_meter);
exit_unlock:
ovs_unlock();
nlmsg_free(reply);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 1990d496fcfc..e595079c2caf 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
node->id = node_id;
- radix_tree_insert(&nodes, node_id, node);
+ if (radix_tree_insert(&nodes, node_id, node)) {
+ kfree(node);
+ return NULL;
+ }
return node;
}
diff --git a/net/rds/message.c b/net/rds/message.c
index b47e4f0a1639..c19c93561227 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_lock_irqsave(&q->lock, flags);
head = &q->zcookie_head;
if (!list_empty(head)) {
- info = list_entry(head, struct rds_msg_zcopy_info,
- rs_zcookie_next);
- if (info && rds_zcookie_add(info, cookie)) {
+ info = list_first_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (rds_zcookie_add(info, cookie)) {
spin_unlock_irqrestore(&q->lock, flags);
kfree(rds_info_from_znotifier(znotif));
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 36fefc3957d7..ca2b17f32670 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 4b1b59da5c0b..4d15b6a6169c 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
cp = rcu_dereference_bh(ca->params);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
@@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
index = actparm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_ctinfo_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ctinfo_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index ee2a050c887b..6640e75eaa02 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/refcount.h>
+#include <linux/rcupdate.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
@@ -339,6 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
+ bool update_h = false;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
@@ -456,10 +458,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
}
- if (cp->perfect)
+ if (cp->perfect) {
r = cp->perfect + handle;
- else
- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
+ } else {
+ /* imperfect area is updated in-place using rcu */
+ update_h = !!tcindex_lookup(cp, handle);
+ r = &new_filter_result;
+ }
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -485,7 +490,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
rcu_assign_pointer(tp->root, cp);
- if (r == &new_filter_result) {
+ if (update_h) {
+ struct tcindex_filter __rcu **fp;
+ struct tcindex_filter *cf;
+
+ f->result.res = r->res;
+ tcf_exts_change(&f->result.exts, &r->exts);
+
+ /* imperfect area bucket */
+ fp = cp->h + (handle % cp->hash);
+
+ /* lookup the filter, guaranteed to exist */
+ for (cf = rcu_dereference_bh_rtnl(*fp); cf;
+ fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp))
+ if (cf->key == (u16)handle)
+ break;
+
+ f->next = cf->next;
+
+ cf = rcu_replace_pointer(*fp, f, 1);
+ tcf_exts_get_net(&cf->result.exts);
+ tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work);
+ } else if (r == &new_filter_result) {
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f46643850df8..92f2975b6a82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -431,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;
while (m) {
- int prio = ffz(~m);
+ unsigned int prio = ffz(~m);
+
+ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
+ break;
m &= ~(1 << prio);
if (p->inner.clprio[prio].feed.rb_node)
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index a557009e9832..c3d6b92dd386 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
/* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
+ if (!list_is_first(&tsp->asoc->asocs, &ep->asocs))
return 0;
if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca1eba95c293..2f66a2006517 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if ((time_before(transport->hb_timer.expires, expires) ||
- !timer_pending(&transport->hb_timer)) &&
- !mod_timer(&transport->hb_timer,
+ if (!mod_timer(&transport->hb_timer,
expires + get_random_u32_below(transport->rto)))
sctp_transport_hold(transport);
}
diff --git a/net/socket.c b/net/socket.c
index 888cd618a968..c6c44e26e954 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -385,7 +385,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
};
static int sockfs_security_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -589,10 +589,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
-static int sockfs_setattr(struct user_namespace *mnt_userns,
+static int sockfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
- int err = simple_setattr(&init_user_ns, dentry, iattr);
+ int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -971,9 +971,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
- if (sock_flag(sk, SOCK_RCVMARK) && skb)
- put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
- &skb->mark);
+ if (sock_flag(sk, SOCK_RCVMARK) && skb) {
+ /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
+ __u32 mark = skb->mark;
+
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
+ }
}
void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b35c8701876a..a38733f2197a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2614,6 +2614,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
/* Send a 'SYN-' to destination */
m.msg_name = dest;
m.msg_namelen = destlen;
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
/* If connect is in non-blocking case, set MSG_DONTWAIT to
* indicate send_msg() is never blocked.
@@ -2776,6 +2777,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
__tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9ed978634125..a83d2b4275fa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2427,7 +2427,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
{
struct tls_rec *rec;
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list);
if (!rec)
return false;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f0c2293f1d3b..81ff98298996 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1190,7 +1190,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
struct net *net = sock_net(sk);
- struct user_namespace *ns; // barf...
+ struct mnt_idmap *idmap;
struct unix_address *addr;
struct dentry *dentry;
struct path parent;
@@ -1217,10 +1217,10 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
/*
* All right, let's create it.
*/
- ns = mnt_user_ns(parent.mnt);
+ idmap = mnt_idmap(parent.mnt);
err = security_path_mknod(&parent, dentry, mode, 0);
if (!err)
- err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
if (err)
goto out_path;
err = mutex_lock_interruptible(&u->bindlock);
@@ -1245,7 +1245,7 @@ out_unlock:
err = -EINVAL;
out_unlink:
/* failed after successful mknod? unlink what we'd created... */
- vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+ vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
out_path:
done_path_create(&parent, dentry);
out:
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a0f62fa02e06..8cbf45a8bcdc 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -5,6 +5,7 @@
* Based on code and translator idea by: Florian Westphal <fw@strlen.de>
*/
#include <linux/compat.h>
+#include <linux/nospec.h>
#include <linux/xfrm.h>
#include <net/xfrm.h>
@@ -302,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
nla_for_each_attr(nla, attrs, len, remaining) {
int err;
- switch (type) {
+ switch (nlh_src->nlmsg_type) {
case XFRM_MSG_NEWSPDINFO:
err = xfrm_nla_cpy(dst, nla, nla_len(nla));
break;
@@ -437,6 +438,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
+ type = array_index_nospec(type, XFRMA_MAX + 1);
if (nla_len(nla) < compat_policy[type].len) {
NL_SET_ERR_MSG(extack, "Attribute bad length");
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c06e54a10540..436d29640ac2 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -279,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e9eb82c5457d..5c61ec04b839 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -336,7 +336,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.hard_use_expires_seconds) {
time64_t tmo = xp->lft.hard_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -354,7 +354,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.soft_use_expires_seconds) {
time64_t tmo = xp->lft.soft_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
tmo = XFRM_KM_TIMEOUT;
@@ -3661,7 +3661,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = ktime_get_real_seconds();
+ /* This lockless write can happen from different cpus. */
+ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
pols[0] = pol;
npols++;
@@ -3676,7 +3677,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
xfrm_pol_put(pols[0]);
return 0;
}
- pols[1]->curlft.use_time = ktime_get_real_seconds();
+ /* This write can happen from different cpus. */
+ WRITE_ONCE(pols[1]->curlft.use_time,
+ ktime_get_real_seconds());
npols++;
}
}
@@ -3742,6 +3745,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+ if (if_id)
+ secpath_reset(skb);
+
xfrm_pols_put(pols, npols);
return 1;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 89c731f4f0c7..00afe831c71c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -577,7 +577,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.state == XFRM_STATE_EXPIRED)
goto expired;
if (x->lft.hard_add_expires_seconds) {
- long tmo = x->lft.hard_add_expires_seconds +
+ time64_t tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
if (x->xflags & XFRM_SOFT_EXPIRE) {
@@ -594,8 +594,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
next = tmo;
}
if (x->lft.hard_use_expires_seconds) {
- long tmo = x->lft.hard_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.hard_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -604,7 +604,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.dying)
goto resched;
if (x->lft.soft_add_expires_seconds) {
- long tmo = x->lft.soft_add_expires_seconds +
+ time64_t tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@@ -616,8 +616,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
}
}
if (x->lft.soft_use_expires_seconds) {
- long tmo = x->lft.soft_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.soft_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
@@ -1906,7 +1906,7 @@ out:
hrtimer_start(&x1->mtimer, ktime_set(1, 0),
HRTIMER_MODE_REL_SOFT);
- if (x1->curlft.use_time)
+ if (READ_ONCE(x1->curlft.use_time))
xfrm_state_check_expire(x1);
if (x->props.smark.m || x->props.smark.v || x->if_id) {
@@ -1940,8 +1940,8 @@ int xfrm_state_check_expire(struct xfrm_state *x)
{
xfrm_dev_state_update_curlft(x);
- if (!x->curlft.use_time)
- x->curlft.use_time = ktime_get_real_seconds();
+ if (!READ_ONCE(x->curlft.use_time))
+ WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {