summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/9p/client.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c132
-rw-r--r--net/batman-adv/bitarray.c6
-rw-r--r--net/batman-adv/bitarray.h10
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c78
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h8
-rw-r--r--net/batman-adv/debugfs.h1
-rw-r--r--net/batman-adv/distributed-arp-table.c62
-rw-r--r--net/batman-adv/distributed-arp-table.h8
-rw-r--r--net/batman-adv/fragmentation.c13
-rw-r--r--net/batman-adv/gateway_client.c77
-rw-r--r--net/batman-adv/gateway_client.h4
-rw-r--r--net/batman-adv/gateway_common.c67
-rw-r--r--net/batman-adv/gateway_common.h1
-rw-r--r--net/batman-adv/hard-interface.c44
-rw-r--r--net/batman-adv/hash.c6
-rw-r--r--net/batman-adv/hash.h12
-rw-r--r--net/batman-adv/icmp_socket.c6
-rw-r--r--net/batman-adv/icmp_socket.h1
-rw-r--r--net/batman-adv/main.c86
-rw-r--r--net/batman-adv/main.h50
-rw-r--r--net/batman-adv/multicast.c35
-rw-r--r--net/batman-adv/multicast.h2
-rw-r--r--net/batman-adv/network-coding.c55
-rw-r--r--net/batman-adv/network-coding.h4
-rw-r--r--net/batman-adv/originator.c110
-rw-r--r--net/batman-adv/originator.h16
-rw-r--r--net/batman-adv/packet.h204
-rw-r--r--net/batman-adv/routing.c24
-rw-r--r--net/batman-adv/routing.h6
-rw-r--r--net/batman-adv/send.c8
-rw-r--r--net/batman-adv/send.h11
-rw-r--r--net/batman-adv/soft-interface.c30
-rw-r--r--net/batman-adv/soft-interface.h4
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/translation-table.c313
-rw-r--r--net/batman-adv/translation-table.h31
-rw-r--r--net/batman-adv/types.h115
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netlink.c36
-rw-r--r--net/bridge/br_private.h5
-rw-r--r--net/bridge/br_vlan.c35
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/dst.c4
-rw-r--r--net/core/filter.c11
-rw-r--r--net/core/lwtunnel.c60
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/skbuff.c43
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/utils.c17
-rw-r--r--net/dsa/dsa.c40
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ipv4/Kconfig14
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c40
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/fou.c32
-rw-r--r--net/ipv4/geneve_core.c447
-rw-r--r--net/ipv4/icmp.c7
-rw-r--r--net/ipv4/igmp.c59
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inetpeer.c20
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_gre.c36
-rw-r--r--net/ipv4/ip_tunnel_core.c127
-rw-r--r--net/ipv4/netfilter/Kconfig12
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c19
-rw-r--r--net/ipv4/netfilter/ip_tables.c28
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c17
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c120
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c110
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/sysctl_net_ipv4.c36
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c21
-rw-r--r--net/ipv4/tcp_metrics.c81
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/udp_tunnel.c25
-rw-r--r--net/ipv4/xfrm4_policy.c7
-rw-r--r--net/ipv6/Kconfig19
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/ah6.c4
-rw-r--r--net/ipv6/ila.c229
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_udp_tunnel.c9
-rw-r--r--net/ipv6/mcast_snoop.c33
-rw-r--r--net/ipv6/ndisc.c10
-rw-r--r--net/ipv6/netfilter/Kconfig12
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c23
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c6
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c18
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c96
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c108
-rw-r--r--net/ipv6/route.c109
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/key/af_key.c46
-rw-r--r--net/mac80211/rc80211_minstrel.c11
-rw-r--r--net/mpls/mpls_iptunnel.c8
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/ipvs/Kconfig11
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c143
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c269
-rw-r--r--net/netfilter/nf_conntrack_core.c134
-rw-r--r--net/netfilter/nf_conntrack_expect.c21
-rw-r--r--net/netfilter/nf_conntrack_labels.c34
-rw-r--r--net/netfilter/nf_conntrack_netlink.c228
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c9
-rw-r--r--net/netfilter/nf_conntrack_standalone.c39
-rw-r--r--net/netfilter/nf_nat_core.c24
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c2
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/netfilter/nfnetlink_acct.c71
-rw-r--r--net/netfilter/nft_counter.c97
-rw-r--r--net/netfilter/nft_limit.c188
-rw-r--r--net/netfilter/nft_payload.c57
-rw-r--r--net/netfilter/xt_CT.c26
-rw-r--r--net/netfilter/xt_TCPMSS.c8
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c2
-rw-r--r--net/netfilter/xt_TEE.c164
-rw-r--r--net/netfilter/xt_connlabel.c16
-rw-r--r--net/netfilter/xt_connlimit.c9
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/nfc/nci/core.c18
-rw-r--r--net/nfc/nci/hci.c2
-rw-r--r--net/nfc/netlink.c91
-rw-r--r--net/openvswitch/Kconfig13
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c242
-rw-r--r--net/openvswitch/conntrack.c755
-rw-r--r--net/openvswitch/conntrack.h86
-rw-r--r--net/openvswitch/datapath.c86
-rw-r--r--net/openvswitch/datapath.h13
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h11
-rw-r--r--net/openvswitch/flow_netlink.c147
-rw-r--r--net/openvswitch/flow_netlink.h13
-rw-r--r--net/openvswitch/flow_table.c2
-rw-r--r--net/openvswitch/vport-geneve.c179
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.c9
-rw-r--r--net/openvswitch/vport.h6
-rw-r--r--net/rds/af_rds.c9
-rw-r--r--net/rds/connection.c2
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h4
-rw-r--r--net/rds/ib_cm.c17
-rw-r--r--net/rds/ib_rdma.c57
-rw-r--r--net/rds/ib_recv.c76
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/rdma.c9
-rw-r--r--net/rds/rdma_transport.c15
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c54
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/act_bpf.c38
-rw-r--r--net/sched/act_connmark.c6
-rw-r--r--net/sched/act_nat.c7
-rw-r--r--net/sched/cls_rsvp.h18
-rw-r--r--net/sched/cls_tcindex.c29
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/sch_api.c55
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c54
-rw-r--r--net/sched/sch_gred.c8
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c8
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_plug.c8
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/tipc/link.c5
-rw-r--r--net/tipc/node.c27
218 files changed, 5256 insertions, 2743 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 01d7ba840df8..fded86508117 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -791,10 +791,9 @@ void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->priv_flags |= IFF_802_1Q_VLAN;
+ dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
netif_keep_dst(dev);
- dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
dev->destructor = vlan_dev_free;
diff --git a/net/9p/client.c b/net/9p/client.c
index 498454b3c06c..ea79ee9a7348 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1541,6 +1541,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int total = 0;
+ *err = 0;
p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
fid->fid, (unsigned long long) offset, (int)iov_iter_count(to));
@@ -1620,6 +1621,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int total = 0;
+ *err = 0;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
fid->fid, (unsigned long long) offset,
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 753383c2215c..912d9c36fb1c 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -77,8 +77,7 @@ enum batadv_dup_status {
* @lq_index: index to store the value at
* @value: value to store in the ring buffer
*/
-static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
- uint8_t value)
+static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
{
lq_recv[*lq_index] = value;
*lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
@@ -91,12 +90,12 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
*
* Returns computed average value.
*/
-static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
+static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
{
- const uint8_t *ptr;
- uint16_t count = 0;
- uint16_t i = 0;
- uint16_t sum = 0;
+ const u8 *ptr;
+ u16 count = 0;
+ u16 i = 0;
+ u16 sum = 0;
ptr = lq_recv;
@@ -113,7 +112,7 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
if (count == 0)
return 0;
- return (uint8_t)(sum / count);
+ return (u8)(sum / count);
}
/**
@@ -155,14 +154,14 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
kfree(orig_node->bat_iv.bcast_own);
orig_node->bat_iv.bcast_own = data_ptr;
- data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
if (!data_ptr) {
kfree(orig_node->bat_iv.bcast_own);
goto unlock;
}
memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
- (max_if_num - 1) * sizeof(uint8_t));
+ (max_if_num - 1) * sizeof(u8));
kfree(orig_node->bat_iv.bcast_own_sum);
orig_node->bat_iv.bcast_own_sum = data_ptr;
@@ -215,19 +214,19 @@ free_bcast_own:
if (max_if_num == 0)
goto free_own_sum;
- data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC);
+ data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
if (!data_ptr) {
kfree(orig_node->bat_iv.bcast_own);
goto unlock;
}
memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
- del_if_num * sizeof(uint8_t));
+ del_if_num * sizeof(u8));
- if_offset = (del_if_num + 1) * sizeof(uint8_t);
- memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
+ if_offset = (del_if_num + 1) * sizeof(u8);
+ memcpy((char *)data_ptr + del_if_num * sizeof(u8),
orig_node->bat_iv.bcast_own_sum + if_offset,
- (max_if_num - del_if_num) * sizeof(uint8_t));
+ (max_if_num - del_if_num) * sizeof(u8));
free_own_sum:
kfree(orig_node->bat_iv.bcast_own_sum);
@@ -250,7 +249,7 @@ unlock:
* If the object does not exists it is created an initialised.
*/
static struct batadv_orig_node *
-batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
+batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
{
struct batadv_orig_node *orig_node;
int size, hash_added;
@@ -270,7 +269,7 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
if (!orig_node->bat_iv.bcast_own)
goto free_orig_node;
- size = bat_priv->num_ifaces * sizeof(uint8_t);
+ size = bat_priv->num_ifaces * sizeof(u8);
orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
if (!orig_node->bat_iv.bcast_own_sum)
goto free_orig_node;
@@ -293,43 +292,17 @@ free_orig_node:
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr,
+ const u8 *neigh_addr,
struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh)
{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
+ struct batadv_neigh_node *neigh_node;
- neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
+ neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr);
if (!neigh_node)
goto out;
- if (!atomic_inc_not_zero(&hard_iface->refcount)) {
- kfree(neigh_node);
- neigh_node = NULL;
- goto out;
- }
-
neigh_node->orig_node = orig_neigh;
- neigh_node->if_incoming = hard_iface;
-
- spin_lock_bh(&orig_node->neigh_list_lock);
- tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
- neigh_addr);
- if (!tmp_neigh_node) {
- hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
- } else {
- kfree(neigh_node);
- batadv_hardif_free_ref(hard_iface);
- neigh_node = tmp_neigh_node;
- }
- spin_unlock_bh(&orig_node->neigh_list_lock);
-
- if (!tmp_neigh_node)
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM for orig_node %pM on interface %s\n",
- neigh_addr, orig_node->orig,
- hard_iface->net_dev->name);
out:
return neigh_node;
@@ -339,7 +312,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
unsigned char *ogm_buff;
- uint32_t random_seqno;
+ u32 random_seqno;
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
@@ -411,8 +384,7 @@ static unsigned long batadv_iv_ogm_fwd_send_time(void)
}
/* apply hop penalty for a normal link */
-static uint8_t batadv_hop_penalty(uint8_t tq,
- const struct batadv_priv *bat_priv)
+static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
{
int hop_penalty = atomic_read(&bat_priv->hop_penalty);
int new_tq;
@@ -442,11 +414,11 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
const char *fwd_str;
- uint8_t packet_num;
- int16_t buff_pos;
+ u8 packet_num;
+ s16 buff_pos;
struct batadv_ogm_packet *batadv_ogm_packet;
struct sk_buff *skb;
- uint8_t *packet_pos;
+ u8 *packet_pos;
if (hard_iface->if_status != BATADV_IF_ACTIVE)
return;
@@ -837,7 +809,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- uint16_t tvlv_len;
+ u16 tvlv_len;
if (batadv_ogm_packet->ttl <= 1) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
@@ -896,9 +868,9 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
struct hlist_head *head;
struct batadv_orig_node *orig_node;
unsigned long *word;
- uint32_t i;
+ u32 i;
size_t word_index;
- uint8_t *w;
+ u8 *w;
int if_num;
for (i = 0; i < hash->size; i++) {
@@ -927,8 +899,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *primary_if, *tmp_hard_iface;
int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
- uint32_t seqno;
- uint16_t tvlv_len = 0;
+ u32 seqno;
+ u16 tvlv_len = 0;
unsigned long send_time;
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -947,7 +919,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->tvlv_len = htons(tvlv_len);
/* change sequence number to network order */
- seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno);
+ seqno = (u32)atomic_read(&hard_iface->bat_iv.ogm_seqno);
batadv_ogm_packet->seqno = htonl(seqno);
atomic_inc(&hard_iface->bat_iv.ogm_seqno);
@@ -970,7 +942,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
rcu_read_lock();
list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
if (tmp_hard_iface->soft_iface != hard_iface->soft_iface)
- continue;
+ continue;
batadv_iv_ogm_queue_add(bat_priv, *ogm_buff,
*ogm_buff_len, hard_iface,
tmp_hard_iface, 1, send_time);
@@ -1006,13 +978,14 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
{
struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
- struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+ struct batadv_neigh_node *neigh_node = NULL;
+ struct batadv_neigh_node *tmp_neigh_node = NULL;
struct batadv_neigh_node *router = NULL;
struct batadv_orig_node *orig_node_tmp;
int if_num;
- uint8_t sum_orig, sum_neigh;
- uint8_t *neigh_addr;
- uint8_t tq_avg;
+ u8 sum_orig, sum_neigh;
+ u8 *neigh_addr;
+ u8 tq_avg;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"update_originator(): Searching and updating originator entry of received packet\n");
@@ -1164,8 +1137,8 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
- uint8_t total_count;
- uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
+ u8 total_count;
+ u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0;
unsigned int combined_tq;
@@ -1311,13 +1284,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
int is_dup;
- int32_t seq_diff;
+ s32 seq_diff;
int need_update = 0;
int set_mark;
enum batadv_dup_status ret = BATADV_NO_DUP;
- uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
- uint8_t *neigh_addr;
- uint8_t packet_count;
+ u32 seqno = ntohl(batadv_ogm_packet->seqno);
+ u8 *neigh_addr;
+ u8 packet_count;
unsigned long *bitmap;
orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig);
@@ -1406,7 +1379,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct batadv_neigh_node *router = NULL, *router_router = NULL;
+ struct batadv_neigh_node *router = NULL;
+ struct batadv_neigh_node *router_router = NULL;
struct batadv_orig_node *orig_neigh_node;
struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *orig_neigh_router = NULL;
@@ -1418,7 +1392,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
bool sameseq, similar_ttl;
struct sk_buff *skb_priv;
struct ethhdr *ethhdr;
- uint8_t *prev_sender;
+ u8 *prev_sender;
int is_bidirect;
/* create a private copy of the skb, as some functions change tq value
@@ -1600,7 +1574,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_orig_node *orig_neigh_node, *orig_node;
struct batadv_hard_iface *hard_iface;
struct batadv_ogm_packet *ogm_packet;
- uint32_t if_incoming_seqno;
+ u32 if_incoming_seqno;
bool has_directlink_flag;
struct ethhdr *ethhdr;
bool is_my_oldorig = false;
@@ -1673,9 +1647,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (is_my_orig) {
unsigned long *word;
int offset;
- int32_t bit_pos;
- int16_t if_num;
- uint8_t *weight;
+ s32 bit_pos;
+ s16 if_num;
+ u8 *weight;
orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
ethhdr->h_source);
@@ -1751,7 +1725,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_ogm_packet *ogm_packet;
- uint8_t *packet_pos;
+ u8 *packet_pos;
int ogm_offset;
bool ret;
@@ -1835,7 +1809,7 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
unsigned long last_seen_jiffies;
struct hlist_head *head;
int batman_count = 0;
- uint32_t i;
+ u32 i;
seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
"Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
@@ -1903,7 +1877,7 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing2)
{
struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
- uint8_t tq1, tq2;
+ u8 tq1, tq2;
int diff;
neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
@@ -1945,7 +1919,7 @@ batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1,
struct batadv_hard_iface *if_outgoing2)
{
struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
- uint8_t tq1, tq2;
+ u8 tq1, tq2;
bool ret;
neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index cf68c328345e..25cbc36e997a 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -21,7 +21,7 @@
#include <linux/bitmap.h>
/* shift the packet array by n places. */
-static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n)
+static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
{
if (n <= 0 || n >= BATADV_TQ_LOCAL_WINDOW_SIZE)
return;
@@ -35,8 +35,8 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n)
* 1 if the window was moved (either new or very old)
* 0 if the window was not moved/shifted.
*/
-int batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
- int32_t seq_num_diff, int set_mark)
+int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
+ int set_mark)
{
struct batadv_priv *bat_priv = priv;
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 0c2456225fae..0226b220fe5b 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -28,9 +28,9 @@
* and curr_seqno is within range of last_seqno. Otherwise returns 0.
*/
static inline int batadv_test_bit(const unsigned long *seq_bits,
- uint32_t last_seqno, uint32_t curr_seqno)
+ u32 last_seqno, u32 curr_seqno)
{
- int32_t diff;
+ s32 diff;
diff = last_seqno - curr_seqno;
if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE)
@@ -39,7 +39,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits,
}
/* turn corresponding bit on, so we can remember that we got the packet */
-static inline void batadv_set_bit(unsigned long *seq_bits, int32_t n)
+static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
{
/* if too old, just drop it */
if (n < 0 || n >= BATADV_TQ_LOCAL_WINDOW_SIZE)
@@ -51,7 +51,7 @@ static inline void batadv_set_bit(unsigned long *seq_bits, int32_t n)
/* receive and process one packet, returns 1 if received seq_num is considered
* new, 0 if old
*/
-int batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
- int32_t seq_num_diff, int set_mark);
+int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
+ int set_mark);
#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ba0609292ae7..191a70290dca 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -51,7 +51,7 @@
#include "packet.h"
#include "translation-table.h"
-static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
+static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05};
static void batadv_bla_periodic_work(struct work_struct *work);
static void
@@ -59,10 +59,10 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
struct batadv_bla_backbone_gw *backbone_gw);
/* return the index of the claim */
-static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
+static inline u32 batadv_choose_claim(const void *data, u32 size)
{
struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
- uint32_t hash = 0;
+ u32 hash = 0;
hash = jhash(&claim->addr, sizeof(claim->addr), hash);
hash = jhash(&claim->vid, sizeof(claim->vid), hash);
@@ -71,11 +71,10 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size)
}
/* return the index of the backbone gateway */
-static inline uint32_t batadv_choose_backbone_gw(const void *data,
- uint32_t size)
+static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
{
const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
- uint32_t hash = 0;
+ u32 hash = 0;
hash = jhash(&claim->addr, sizeof(claim->addr), hash);
hash = jhash(&claim->vid, sizeof(claim->vid), hash);
@@ -89,7 +88,8 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_bla_backbone_gw,
hash_entry);
- const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2;
+ const struct batadv_bla_backbone_gw *gw1 = data1;
+ const struct batadv_bla_backbone_gw *gw2 = data2;
if (!batadv_compare_eth(gw1->orig, gw2->orig))
return 0;
@@ -106,7 +106,8 @@ static int batadv_compare_claim(const struct hlist_node *node,
{
const void *data1 = container_of(node, struct batadv_bla_claim,
hash_entry);
- const struct batadv_bla_claim *cl1 = data1, *cl2 = data2;
+ const struct batadv_bla_claim *cl1 = data1;
+ const struct batadv_bla_claim *cl2 = data2;
if (!batadv_compare_eth(cl1->addr, cl2->addr))
return 0;
@@ -192,8 +193,8 @@ static struct batadv_bla_claim
* Returns claim if found or NULL otherwise.
*/
static struct batadv_bla_backbone_gw *
-batadv_backbone_hash_find(struct batadv_priv *bat_priv,
- uint8_t *addr, unsigned short vid)
+batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
struct hlist_head *head;
@@ -269,14 +270,14 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
* @vid: the VLAN ID
* @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...)
*/
-static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
+static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
unsigned short vid, int claimtype)
{
struct sk_buff *skb;
struct ethhdr *ethhdr;
struct batadv_hard_iface *primary_if;
struct net_device *soft_iface;
- uint8_t *hw_src;
+ u8 *hw_src;
struct batadv_bla_claim_dst local_claim_dest;
__be32 zeroip = 0;
@@ -304,13 +305,13 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
* with XX = claim type
* and YY:YY = group id
*/
- (uint8_t *)&local_claim_dest);
+ (u8 *)&local_claim_dest);
if (!skb)
goto out;
ethhdr = (struct ethhdr *)skb->data;
- hw_src = (uint8_t *)ethhdr + ETH_HLEN + sizeof(struct arphdr);
+ hw_src = (u8 *)ethhdr + ETH_HLEN + sizeof(struct arphdr);
/* now we pretend that the client would have sent this ... */
switch (claimtype) {
@@ -383,7 +384,7 @@ out:
* be found.
*/
static struct batadv_bla_backbone_gw *
-batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
+batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid, bool own_backbone)
{
struct batadv_bla_backbone_gw *entry;
@@ -552,7 +553,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
struct batadv_bla_backbone_gw *backbone_gw)
{
- uint8_t mac[ETH_ALEN];
+ u8 mac[ETH_ALEN];
__be16 crc;
memcpy(mac, batadv_announce_mac, 4);
@@ -571,7 +572,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
* @backbone_gw: the backbone gateway which claims it
*/
static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
- const uint8_t *mac, const unsigned short vid,
+ const u8 *mac, const unsigned short vid,
struct batadv_bla_backbone_gw *backbone_gw)
{
struct batadv_bla_claim *claim;
@@ -635,7 +636,7 @@ claim_free_ref:
* given mac address and vid.
*/
static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
- const uint8_t *mac, const unsigned short vid)
+ const u8 *mac, const unsigned short vid)
{
struct batadv_bla_claim search_claim, *claim;
@@ -659,12 +660,11 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
}
/* check for ANNOUNCE frame, return 1 if handled */
-static int batadv_handle_announce(struct batadv_priv *bat_priv,
- uint8_t *an_addr, uint8_t *backbone_addr,
- unsigned short vid)
+static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
+ u8 *backbone_addr, unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
- uint16_t crc;
+ u16 crc;
if (memcmp(an_addr, batadv_announce_mac, 4) != 0)
return 0;
@@ -708,8 +708,8 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
/* check for REQUEST frame, return 1 if handled */
static int batadv_handle_request(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- uint8_t *backbone_addr,
- struct ethhdr *ethhdr, unsigned short vid)
+ u8 *backbone_addr, struct ethhdr *ethhdr,
+ unsigned short vid)
{
/* check for REQUEST frame */
if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest))
@@ -732,8 +732,8 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
/* check for UNCLAIM frame, return 1 if handled */
static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- uint8_t *backbone_addr,
- uint8_t *claim_addr, unsigned short vid)
+ u8 *backbone_addr, u8 *claim_addr,
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -761,7 +761,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* check for CLAIM frame, return 1 if handled */
static int batadv_handle_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- uint8_t *backbone_addr, uint8_t *claim_addr,
+ u8 *backbone_addr, u8 *claim_addr,
unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -805,10 +805,10 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
*/
static int batadv_check_claim_group(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- uint8_t *hw_src, uint8_t *hw_dst,
+ u8 *hw_src, u8 *hw_dst,
struct ethhdr *ethhdr)
{
- uint8_t *backbone_addr;
+ u8 *backbone_addr;
struct batadv_orig_node *orig_node;
struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
@@ -877,7 +877,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
- uint8_t *hw_src, *hw_dst;
+ u8 *hw_src, *hw_dst;
struct vlan_hdr *vhdr, vhdr_buf;
struct ethhdr *ethhdr;
struct arphdr *arphdr;
@@ -923,7 +923,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
/* pskb_may_pull() may have modified the pointers, get ethhdr again */
ethhdr = eth_hdr(skb);
- arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen);
+ arphdr = (struct arphdr *)((u8 *)ethhdr + headlen);
/* Check whether the ARP frame carries a valid
* IP information
@@ -937,7 +937,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
if (arphdr->ar_pln != 4)
return 0;
- hw_src = (uint8_t *)arphdr + sizeof(struct arphdr);
+ hw_src = (u8 *)arphdr + sizeof(struct arphdr);
hw_dst = hw_src + ETH_ALEN + 4;
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
bla_dst_own = &bat_priv->bla.claim_dest;
@@ -1238,9 +1238,9 @@ static struct lock_class_key batadv_backbone_hash_lock_class_key;
int batadv_bla_init(struct batadv_priv *bat_priv)
{
int i;
- uint8_t claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00};
+ u8 claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00};
struct batadv_hard_iface *primary_if;
- uint16_t crc;
+ u16 crc;
unsigned long entrytime;
spin_lock_init(&bat_priv->bla.bcast_duplist_lock);
@@ -1368,7 +1368,7 @@ out:
*
* Returns true if orig is a backbone for this vid, false otherwise.
*/
-bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig,
+bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
@@ -1647,9 +1647,9 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_bla_claim *claim;
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
bool is_own;
- uint8_t *primary_addr;
+ u8 *primary_addr;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
@@ -1692,9 +1692,9 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
int secs, msecs;
- uint32_t i;
+ u32 i;
bool is_own;
- uint8_t *primary_addr;
+ u8 *primary_addr;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0282690389ac..025152b34282 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -22,9 +22,6 @@
#include <linux/types.h>
-struct batadv_hard_iface;
-struct batadv_orig_node;
-struct batadv_priv;
struct seq_file;
struct sk_buff;
@@ -38,7 +35,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
void *offset);
-bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig,
+bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
unsigned short vid);
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
struct sk_buff *skb);
@@ -84,8 +81,7 @@ static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
}
static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
- uint8_t *orig,
- unsigned short vid)
+ u8 *orig, unsigned short vid)
{
return false;
}
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 187acdc85dfa..80ab8d6f0ab3 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -22,7 +22,6 @@
#include <linux/kconfig.h>
-struct batadv_hard_iface;
struct net_device;
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index cc7d87d64987..83bc1aaf5800 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -102,7 +102,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
struct batadv_dat_entry *dat_entry;
struct hlist_node *node_tmp;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
if (!bat_priv->dat.hash)
return;
@@ -168,11 +168,11 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
*
* Returns the value of the hw_src field in the ARP packet.
*/
-static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
+static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
{
- uint8_t *addr;
+ u8 *addr;
- addr = (uint8_t *)(skb->data + hdr_size);
+ addr = (u8 *)(skb->data + hdr_size);
addr += ETH_HLEN + sizeof(struct arphdr);
return addr;
@@ -197,7 +197,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
*
* Returns the value of the hw_dst field in the ARP packet.
*/
-static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
+static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
{
return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4;
}
@@ -221,12 +221,12 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
*
* Returns the selected index in the hash table for the given data.
*/
-static uint32_t batadv_hash_dat(const void *data, uint32_t size)
+static u32 batadv_hash_dat(const void *data, u32 size)
{
- uint32_t hash = 0;
+ u32 hash = 0;
const struct batadv_dat_entry *dat = data;
const unsigned char *key;
- uint32_t i;
+ u32 i;
key = (const unsigned char *)&dat->ip;
for (i = 0; i < sizeof(dat->ip); i++) {
@@ -265,7 +265,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
struct hlist_head *head;
struct batadv_dat_entry to_find, *dat_entry, *dat_entry_tmp = NULL;
struct batadv_hashtable *hash = bat_priv->dat.hash;
- uint32_t index;
+ u32 index;
if (!hash)
return NULL;
@@ -300,7 +300,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
* @vid: VLAN identifier
*/
static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
- uint8_t *mac_addr, unsigned short vid)
+ u8 *mac_addr, unsigned short vid)
{
struct batadv_dat_entry *dat_entry;
int hash_added;
@@ -357,11 +357,11 @@ out:
* @msg: message to print together with the debugging information
*/
static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
- uint16_t type, int hdr_size, char *msg)
+ u16 type, int hdr_size, char *msg)
{
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
struct batadv_bcast_packet *bcast_pkt;
- uint8_t *orig_addr;
+ u8 *orig_addr;
__be32 ip_src, ip_dst;
if (msg)
@@ -424,7 +424,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
#else
static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
- uint16_t type, int hdr_size, char *msg)
+ u16 type, int hdr_size, char *msg)
{
}
@@ -497,7 +497,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
int select, batadv_dat_addr_t ip_key,
batadv_dat_addr_t *last_max)
{
- batadv_dat_addr_t max = 0, tmp_max = 0;
+ batadv_dat_addr_t max = 0;
+ batadv_dat_addr_t tmp_max = 0;
struct batadv_orig_node *orig_node, *max_orig_node = NULL;
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
@@ -709,9 +710,8 @@ void batadv_dat_status_update(struct net_device *net_dev)
*/
static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
- void *tvlv_value,
- uint16_t tvlv_value_len)
+ u8 flags,
+ void *tvlv_value, u16 tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities);
@@ -787,7 +787,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
struct hlist_head *head;
unsigned long last_seen_jiffies;
int last_seen_msecs, last_seen_secs, last_seen_mins;
- uint32_t i;
+ u32 i;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
@@ -830,14 +830,14 @@ out:
*
* Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise.
*/
-static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int hdr_size)
+static u16 batadv_arp_get_type(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int hdr_size)
{
struct arphdr *arphdr;
struct ethhdr *ethhdr;
__be32 ip_src, ip_dst;
- uint8_t *hw_src, *hw_dst;
- uint16_t type = 0;
+ u8 *hw_src, *hw_dst;
+ u16 type = 0;
/* pull the ethernet header */
if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN)))
@@ -934,9 +934,9 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
- uint16_t type = 0;
+ u16 type = 0;
__be32 ip_dst, ip_src;
- uint8_t *hw_src;
+ u8 *hw_src;
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
@@ -1022,9 +1022,9 @@ out:
bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
- uint16_t type;
+ u16 type;
__be32 ip_src, ip_dst;
- uint8_t *hw_src;
+ u8 *hw_src;
struct sk_buff *skb_new;
struct batadv_dat_entry *dat_entry = NULL;
bool ret = false;
@@ -1100,9 +1100,9 @@ out:
void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
- uint16_t type;
+ u16 type;
__be32 ip_src, ip_dst;
- uint8_t *hw_src, *hw_dst;
+ u8 *hw_src, *hw_dst;
int hdr_size = 0;
unsigned short vid;
@@ -1146,9 +1146,9 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
- uint16_t type;
+ u16 type;
__be32 ip_src, ip_dst;
- uint8_t *hw_src, *hw_dst;
+ u8 *hw_src, *hw_dst;
bool dropped = false;
unsigned short vid;
@@ -1202,7 +1202,7 @@ out:
bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet)
{
- uint16_t type;
+ u16 type;
__be32 ip_dst;
struct batadv_dat_entry *dat_entry = NULL;
bool ret = false;
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 3181507ebc14..26d4a525a798 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -54,7 +54,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
static inline void
batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
{
- uint32_t addr;
+ u32 addr;
addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX);
orig_node->dat_addr = (batadv_dat_addr_t)addr;
@@ -69,7 +69,7 @@ static inline void
batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if)
{
- uint32_t addr;
+ u32 addr;
addr = batadv_choose_orig(primary_if->net_dev->dev_addr,
BATADV_DAT_ADDR_MAX);
@@ -89,7 +89,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
* Updates the ethtool statistics for the received packet if it is a DAT subtype
*/
static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
- uint8_t subtype)
+ u8 subtype)
{
switch (subtype) {
case BATADV_P_DAT_DHT_GET:
@@ -169,7 +169,7 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
}
static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
- uint8_t subtype)
+ u8 subtype)
{
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c0f0d01ab244..700c96c82a15 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,6 +25,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <linux/skbuff.h>
@@ -66,7 +67,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
bool (*check_cb)(struct batadv_frag_table_entry *))
{
struct batadv_frag_table_entry *chain;
- uint8_t i;
+ u8 i;
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
chain = &orig_node->fragments[i];
@@ -110,8 +111,10 @@ static int batadv_frag_size_limit(void)
* without searching for the right position.
*/
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
- uint16_t seqno)
+ u16 seqno)
{
+ lockdep_assert_held(&chain->lock);
+
if (chain->seqno == seqno)
return false;
@@ -145,8 +148,8 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
struct batadv_frag_list_entry *frag_entry_last = NULL;
struct batadv_frag_packet *frag_packet;
- uint8_t bucket;
- uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet);
+ u8 bucket;
+ u16 seqno, hdr_size = sizeof(struct batadv_frag_packet);
bool ret = false;
/* Linearize packet to avoid linearizing 16 packets in a row when doing
@@ -351,7 +354,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_dst = NULL;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
- uint16_t total_size;
+ u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 6012e2b4af4f..e6c8382c79ba 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -27,7 +27,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/netdevice.h>
@@ -153,16 +152,14 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
struct batadv_neigh_node *router;
struct batadv_neigh_ifinfo *router_ifinfo;
struct batadv_gw_node *gw_node, *curr_gw = NULL;
- uint64_t max_gw_factor = 0, tmp_gw_factor = 0;
- uint8_t max_tq = 0;
- uint8_t tq_avg;
+ u64 max_gw_factor = 0;
+ u64 tmp_gw_factor = 0;
+ u8 max_tq = 0;
+ u8 tq_avg;
struct batadv_orig_node *orig_node;
rcu_read_lock();
hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
- if (gw_node->deleted)
- continue;
-
orig_node = gw_node->orig_node;
router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
if (!router)
@@ -263,7 +260,8 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
void batadv_gw_election(struct batadv_priv *bat_priv)
{
- struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL;
+ struct batadv_gw_node *curr_gw = NULL;
+ struct batadv_gw_node *next_gw = NULL;
struct batadv_neigh_node *router = NULL;
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
char gw_addr[18] = { '\0' };
@@ -347,8 +345,9 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
struct batadv_neigh_ifinfo *router_orig_tq = NULL;
struct batadv_neigh_ifinfo *router_gw_tq = NULL;
struct batadv_orig_node *curr_gw_orig;
- struct batadv_neigh_node *router_gw = NULL, *router_orig = NULL;
- uint8_t gw_tq_avg, orig_tq_avg;
+ struct batadv_neigh_node *router_gw = NULL;
+ struct batadv_neigh_node *router_orig = NULL;
+ u8 gw_tq_avg, orig_tq_avg;
curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
if (!curr_gw_orig)
@@ -470,9 +469,6 @@ batadv_gw_node_get(struct batadv_priv *bat_priv,
if (gw_node_tmp->orig_node != orig_node)
continue;
- if (gw_node_tmp->deleted)
- continue;
-
if (!atomic_inc_not_zero(&gw_node_tmp->refcount))
continue;
@@ -522,9 +518,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- gw_node->deleted = 0;
if (ntohl(gateway->bandwidth_down) == 0) {
- gw_node->deleted = jiffies;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Gateway %pM removed from gateway list\n",
orig_node->orig);
@@ -532,14 +526,21 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
/* Note: We don't need a NULL check here, since curr_gw never
* gets dereferenced.
*/
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ hlist_del_init_rcu(&gw_node->list);
+ spin_unlock_bh(&bat_priv->gw.list_lock);
+
+ batadv_gw_node_free_ref(gw_node);
+
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
+
+ if (curr_gw)
+ batadv_gw_node_free_ref(curr_gw);
}
out:
- if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
if (gw_node)
batadv_gw_node_free_ref(gw_node);
}
@@ -555,39 +556,18 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig_node, &gateway);
}
-void batadv_gw_node_purge(struct batadv_priv *bat_priv)
+void batadv_gw_node_free(struct batadv_priv *bat_priv)
{
- struct batadv_gw_node *gw_node, *curr_gw;
+ struct batadv_gw_node *gw_node;
struct hlist_node *node_tmp;
- unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT);
- int do_reselect = 0;
-
- curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
spin_lock_bh(&bat_priv->gw.list_lock);
-
hlist_for_each_entry_safe(gw_node, node_tmp,
&bat_priv->gw.list, list) {
- if (((!gw_node->deleted) ||
- (time_before(jiffies, gw_node->deleted + timeout))) &&
- atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE)
- continue;
-
- if (curr_gw == gw_node)
- do_reselect = 1;
-
- hlist_del_rcu(&gw_node->list);
+ hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_free_ref(gw_node);
}
-
spin_unlock_bh(&bat_priv->gw.list_lock);
-
- /* gw_reselect() needs to acquire the gw_list_lock */
- if (do_reselect)
- batadv_gw_reselect(bat_priv);
-
- if (curr_gw)
- batadv_gw_node_free_ref(curr_gw);
}
/* fails if orig_node has no router */
@@ -651,9 +631,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
- if (gw_node->deleted)
- continue;
-
/* fails if orig_node has no router */
if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
continue;
@@ -688,7 +665,7 @@ out:
*/
enum batadv_dhcp_recipient
batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
- uint8_t *chaddr)
+ u8 *chaddr)
{
enum batadv_dhcp_recipient ret = BATADV_DHCP_NO;
struct ethhdr *ethhdr;
@@ -698,7 +675,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
struct vlan_ethhdr *vhdr;
int chaddr_offset;
__be16 proto;
- uint8_t *p;
+ u8 *p;
/* check for ethernet header */
if (!pskb_may_pull(skb, *header_len + ETH_HLEN))
@@ -808,13 +785,15 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
- struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL;
+ struct batadv_neigh_node *neigh_curr = NULL;
+ struct batadv_neigh_node *neigh_old = NULL;
struct batadv_orig_node *orig_dst_node = NULL;
- struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL;
+ struct batadv_gw_node *gw_node = NULL;
+ struct batadv_gw_node *curr_gw = NULL;
struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
bool out_of_range = false;
- uint8_t curr_tq_avg;
+ u8 curr_tq_avg;
unsigned short vid;
vid = batadv_get_vid(skb, 0);
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 89565b451c18..fa9527785ed3 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -38,11 +38,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
struct batadv_tvlv_gateway_data *gateway);
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
-void batadv_gw_node_purge(struct batadv_priv *bat_priv);
+void batadv_gw_node_free(struct batadv_priv *bat_priv);
int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
enum batadv_dhcp_recipient
batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
- uint8_t *chaddr);
+ u8 *chaddr);
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 39cf44ccebd4..0cb5e6b6f6d4 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -19,8 +19,10 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/errno.h>
#include <linux/byteorder/generic.h>
#include <linux/kernel.h>
+#include <linux/math64.h>
#include <linux/netdevice.h>
#include <linux/stddef.h>
#include <linux/string.h>
@@ -39,11 +41,11 @@
* Returns false on parse error and true otherwise.
*/
static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- uint32_t *down, uint32_t *up)
+ u32 *down, u32 *up)
{
enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
char *slash_ptr, *tmp_ptr;
- long ldown, lup;
+ u64 ldown, lup;
int ret;
slash_ptr = strchr(buff, '/');
@@ -61,7 +63,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
*tmp_ptr = '\0';
}
- ret = kstrtol(buff, 10, &ldown);
+ ret = kstrtou64(buff, 10, &ldown);
if (ret) {
batadv_err(net_dev,
"Download speed of gateway mode invalid: %s\n",
@@ -71,14 +73,31 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
switch (bw_unit_type) {
case BATADV_BW_UNIT_MBIT:
- *down = ldown * 10;
+ /* prevent overflow */
+ if (U64_MAX / 10 < ldown) {
+ batadv_err(net_dev,
+ "Download speed of gateway mode too large: %s\n",
+ buff);
+ return false;
+ }
+
+ ldown *= 10;
break;
case BATADV_BW_UNIT_KBIT:
default:
- *down = ldown / 100;
+ ldown = div_u64(ldown, 100);
break;
}
+ if (U32_MAX < ldown) {
+ batadv_err(net_dev,
+ "Download speed of gateway mode too large: %s\n",
+ buff);
+ return false;
+ }
+
+ *down = ldown;
+
/* we also got some upload info */
if (slash_ptr) {
bw_unit_type = BATADV_BW_UNIT_KBIT;
@@ -94,7 +113,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
*tmp_ptr = '\0';
}
- ret = kstrtol(slash_ptr + 1, 10, &lup);
+ ret = kstrtou64(slash_ptr + 1, 10, &lup);
if (ret) {
batadv_err(net_dev,
"Upload speed of gateway mode invalid: %s\n",
@@ -104,13 +123,30 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
switch (bw_unit_type) {
case BATADV_BW_UNIT_MBIT:
- *up = lup * 10;
+ /* prevent overflow */
+ if (U64_MAX / 10 < lup) {
+ batadv_err(net_dev,
+ "Upload speed of gateway mode too large: %s\n",
+ slash_ptr + 1);
+ return false;
+ }
+
+ lup *= 10;
break;
case BATADV_BW_UNIT_KBIT:
default:
- *up = lup / 100;
+ lup = div_u64(lup, 100);
break;
}
+
+ if (U32_MAX < lup) {
+ batadv_err(net_dev,
+ "Upload speed of gateway mode too large: %s\n",
+ slash_ptr + 1);
+ return false;
+ }
+
+ *up = lup;
}
return true;
@@ -124,7 +160,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_gateway_data gw;
- uint32_t down, up;
+ u32 down, up;
char gw_mode;
gw_mode = atomic_read(&bat_priv->gw_mode);
@@ -149,7 +185,10 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
size_t count)
{
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- uint32_t down_curr, up_curr, down_new = 0, up_new = 0;
+ u32 down_curr;
+ u32 up_curr;
+ u32 down_new = 0;
+ u32 up_new = 0;
bool ret;
down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down);
@@ -157,7 +196,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new);
if (!ret)
- goto end;
+ return -EINVAL;
if (!down_new)
down_new = 1;
@@ -181,7 +220,6 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
atomic_set(&bat_priv->gw.bandwidth_up, up_new);
batadv_gw_tvlv_container_update(bat_priv);
-end:
return count;
}
@@ -195,9 +233,8 @@ end:
*/
static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
- void *tvlv_value,
- uint16_t tvlv_value_len)
+ u8 flags,
+ void *tvlv_value, u16 tvlv_value_len)
{
struct batadv_tvlv_gateway_data gateway, *gateway_ptr;
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index bd5c812cebf4..ab893e318229 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -22,7 +22,6 @@
#include <linux/types.h>
-struct batadv_priv;
struct net_device;
enum batadv_gw_modes {
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f4a15d2e5eaf..f11345e163d7 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -252,6 +252,44 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
rcu_read_unlock();
}
+/**
+ * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom
+ * @soft_iface: netdev struct of the mesh interface
+ */
+static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
+{
+ const struct batadv_hard_iface *hard_iface;
+ unsigned short lower_header_len = ETH_HLEN;
+ unsigned short lower_headroom = 0;
+ unsigned short lower_tailroom = 0;
+ unsigned short needed_headroom;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE)
+ continue;
+
+ if (hard_iface->soft_iface != soft_iface)
+ continue;
+
+ lower_header_len = max_t(unsigned short, lower_header_len,
+ hard_iface->net_dev->hard_header_len);
+
+ lower_headroom = max_t(unsigned short, lower_headroom,
+ hard_iface->net_dev->needed_headroom);
+
+ lower_tailroom = max_t(unsigned short, lower_tailroom,
+ hard_iface->net_dev->needed_tailroom);
+ }
+ rcu_read_unlock();
+
+ needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN);
+ needed_headroom += batadv_max_header_len();
+
+ soft_iface->needed_headroom = needed_headroom;
+ soft_iface->needed_tailroom = lower_tailroom;
+}
+
int batadv_hardif_min_mtu(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -474,6 +512,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
"Not using interface %s (retrying later): interface not active\n",
hard_iface->net_dev->name);
+ batadv_hardif_recalc_extra_skbroom(soft_iface);
+
/* begin scheduling originator messages on that interface */
batadv_schedule_bat_ogm(hard_iface);
@@ -528,6 +568,9 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_purge_outstanding_packets(bat_priv, hard_iface);
dev_put(hard_iface->soft_iface);
+ netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
+ batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
+
/* nobody uses this interface anymore */
if (!bat_priv->num_ifaces) {
batadv_gw_check_client_stop(bat_priv);
@@ -536,7 +579,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_softif_destroy_sysfs(hard_iface->soft_iface);
}
- netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
batadv_hardif_free_ref(hard_iface);
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index e89f3146b092..2ea6a18d793f 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -25,7 +25,7 @@
/* clears the hash */
static void batadv_hash_init(struct batadv_hashtable *hash)
{
- uint32_t i;
+ u32 i;
for (i = 0; i < hash->size; i++) {
INIT_HLIST_HEAD(&hash->table[i]);
@@ -42,7 +42,7 @@ void batadv_hash_destroy(struct batadv_hashtable *hash)
}
/* allocates and clears the hash */
-struct batadv_hashtable *batadv_hash_new(uint32_t size)
+struct batadv_hashtable *batadv_hash_new(u32 size)
{
struct batadv_hashtable *hash;
@@ -73,7 +73,7 @@ free_hash:
void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
struct lock_class_key *key)
{
- uint32_t i;
+ u32 i;
for (i = 0; i < hash->size; i++)
lockdep_set_class(&hash->list_locks[i], key);
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 5065f50c9c3c..377626250ac7 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -39,17 +39,17 @@ typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *,
* based on the key in the data of the first
* argument and the size the second
*/
-typedef uint32_t (*batadv_hashdata_choose_cb)(const void *, uint32_t);
+typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
struct batadv_hashtable {
struct hlist_head *table; /* the hashtable itself with the buckets */
spinlock_t *list_locks; /* spinlock for each hash list entry */
- uint32_t size; /* size of hashtable */
+ u32 size; /* size of hashtable */
};
/* allocates and clears the hash */
-struct batadv_hashtable *batadv_hash_new(uint32_t size);
+struct batadv_hashtable *batadv_hash_new(u32 size);
/* set class key for all locks */
void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
@@ -69,7 +69,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
struct hlist_head *head;
struct hlist_node *node, *node_tmp;
spinlock_t *list_lock; /* spinlock to protect write access */
- uint32_t i;
+ u32 i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -105,7 +105,7 @@ static inline int batadv_hash_add(struct batadv_hashtable *hash,
const void *data,
struct hlist_node *data_node)
{
- uint32_t index;
+ u32 index;
int ret = -1;
struct hlist_head *head;
struct hlist_node *node;
@@ -149,7 +149,7 @@ static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
batadv_hashdata_choose_cb choose,
void *data)
{
- uint32_t index;
+ u32 index;
struct hlist_node *node;
struct hlist_head *head;
void *data_save = NULL;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 07061bcbaa04..bcabb5e3f4d3 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -183,7 +183,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
struct batadv_orig_node *orig_node = NULL;
struct batadv_neigh_node *neigh_node = NULL;
size_t packet_len = sizeof(struct batadv_icmp_packet);
- uint8_t *addr;
+ u8 *addr;
if (len < sizeof(struct batadv_icmp_header)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -337,8 +337,8 @@ err:
}
/**
- * batadv_socket_receive_packet - schedule an icmp packet to be sent to userspace
- * on an icmp socket.
+ * batadv_socket_receive_packet - schedule an icmp packet to be sent to
+ * userspace on an icmp socket.
* @socket_client: the socket this packet belongs to
* @icmph: pointer to the header of the icmp packet
* @icmp_len: total length of the icmp packet
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 7de7fce4b48c..e937143f0b10 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -23,7 +23,6 @@
#include <linux/types.h>
struct batadv_icmp_header;
-struct batadv_priv;
#define BATADV_ICMP_SOCKET "socket"
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 8457097f1643..d7f17c1aa4a4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -30,6 +30,7 @@
#include <linux/ipv6.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netdevice.h>
@@ -148,7 +149,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv6_list);
#endif
INIT_LIST_HEAD(&bat_priv->tt.changes_list);
- INIT_LIST_HEAD(&bat_priv->tt.req_list);
+ INIT_HLIST_HEAD(&bat_priv->tt.req_list);
INIT_LIST_HEAD(&bat_priv->tt.roam_list);
#ifdef CONFIG_BATMAN_ADV_MCAST
INIT_HLIST_HEAD(&bat_priv->mcast.mla_list);
@@ -198,7 +199,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_purge_outstanding_packets(bat_priv, NULL);
- batadv_gw_node_purge(bat_priv);
+ batadv_gw_node_free(bat_priv);
batadv_nc_mesh_free(bat_priv);
batadv_dat_free(bat_priv);
batadv_bla_free(bat_priv);
@@ -234,7 +235,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
*
* Returns 'true' if the mac address was found, false otherwise.
*/
-bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
+bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
{
const struct batadv_hard_iface *hard_iface;
bool is_my_mac = false;
@@ -387,7 +388,7 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct batadv_priv *bat_priv;
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *hard_iface;
- uint8_t idx;
+ u8 idx;
int ret;
hard_iface = container_of(ptype, struct batadv_hard_iface,
@@ -496,7 +497,7 @@ static void batadv_recv_handler_init(void)
}
int
-batadv_recv_handler_register(uint8_t packet_type,
+batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *))
{
@@ -512,7 +513,7 @@ batadv_recv_handler_register(uint8_t packet_type,
return 0;
}
-void batadv_recv_handler_unregister(uint8_t packet_type)
+void batadv_recv_handler_unregister(u8 packet_type)
{
batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
}
@@ -583,7 +584,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
seq_puts(seq, "Available routing algorithms:\n");
hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
- seq_printf(seq, "%s\n", bat_algo_ops->name);
+ seq_printf(seq, " * %s\n", bat_algo_ops->name);
}
return 0;
@@ -642,8 +643,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
* Returns tvlv handler if found or NULL otherwise.
*/
static struct batadv_tvlv_handler
-*batadv_tvlv_handler_get(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version)
+*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
{
struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
@@ -691,8 +691,7 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
* Returns tvlv container if found or NULL otherwise.
*/
static struct batadv_tvlv_container
-*batadv_tvlv_container_get(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version)
+*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
{
struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
@@ -723,10 +722,10 @@ static struct batadv_tvlv_container
*
* Returns size of all currently registered tvlv containers in bytes.
*/
-static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_container *tvlv;
- uint16_t tvlv_len = 0;
+ u16 tvlv_len = 0;
hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
tvlv_len += sizeof(struct batadv_tvlv_hdr);
@@ -739,13 +738,17 @@ static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
/**
* batadv_tvlv_container_remove - remove tvlv container from the tvlv container
* list
+ * @bat_priv: the bat priv with all the soft interface information
* @tvlv: the to be removed tvlv container
*
* Has to be called with the appropriate locks being acquired
* (tvlv.container_list_lock).
*/
-static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv)
+static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_container *tvlv)
{
+ lockdep_assert_held(&bat_priv->tvlv.handler_list_lock);
+
if (!tvlv)
return;
@@ -764,13 +767,13 @@ static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv)
* @version: tvlv container type to unregister
*/
void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version)
+ u8 type, u8 version)
{
struct batadv_tvlv_container *tvlv;
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(tvlv);
+ batadv_tvlv_container_remove(bat_priv, tvlv);
spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
}
@@ -787,8 +790,8 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
* content is going to replace the old one.
*/
void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version,
- void *tvlv_value, uint16_t tvlv_value_len)
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
{
struct batadv_tvlv_container *tvlv_old, *tvlv_new;
@@ -809,7 +812,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->tvlv.container_list_lock);
tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(tvlv_old);
+ batadv_tvlv_container_remove(bat_priv, tvlv_old);
hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
}
@@ -861,14 +864,13 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
*
* Returns size of all appended tvlv containers in bytes.
*/
-uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len,
- int packet_min_len)
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len)
{
struct batadv_tvlv_container *tvlv;
struct batadv_tvlv_hdr *tvlv_hdr;
- uint16_t tvlv_value_len;
+ u16 tvlv_value_len;
void *tvlv_value;
bool ret;
@@ -893,7 +895,7 @@ uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
tvlv_hdr->len = tvlv->tvlv_hdr.len;
tvlv_value = tvlv_hdr + 1;
memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
- tvlv_value = (uint8_t *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+ tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
}
end:
@@ -920,8 +922,8 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
struct batadv_tvlv_handler *tvlv_handler,
bool ogm_source,
struct batadv_orig_node *orig_node,
- uint8_t *src, uint8_t *dst,
- void *tvlv_value, uint16_t tvlv_value_len)
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
{
if (!tvlv_handler)
return NET_RX_SUCCESS;
@@ -972,13 +974,13 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
bool ogm_source,
struct batadv_orig_node *orig_node,
- uint8_t *src, uint8_t *dst,
- void *tvlv_value, uint16_t tvlv_value_len)
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
{
struct batadv_tvlv_handler *tvlv_handler;
struct batadv_tvlv_hdr *tvlv_hdr;
- uint16_t tvlv_value_cont_len;
- uint8_t cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+ u16 tvlv_value_cont_len;
+ u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
int ret = NET_RX_SUCCESS;
while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
@@ -1000,7 +1002,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
tvlv_value_cont_len);
if (tvlv_handler)
batadv_tvlv_handler_free_ref(tvlv_handler);
- tvlv_value = (uint8_t *)tvlv_value + tvlv_value_cont_len;
+ tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
@@ -1034,7 +1036,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
void *tvlv_value;
- uint16_t tvlv_value_len;
+ u16 tvlv_value_len;
if (!batadv_ogm_packet)
return;
@@ -1066,14 +1068,14 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
void (*optr)(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
+ u8 flags,
void *tvlv_value,
- uint16_t tvlv_value_len),
+ u16 tvlv_value_len),
int (*uptr)(struct batadv_priv *bat_priv,
- uint8_t *src, uint8_t *dst,
+ u8 *src, u8 *dst,
void *tvlv_value,
- uint16_t tvlv_value_len),
- uint8_t type, uint8_t version, uint8_t flags)
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags)
{
struct batadv_tvlv_handler *tvlv_handler;
@@ -1108,7 +1110,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
* @version: tvlv handler version to be unregistered
*/
void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version)
+ u8 type, u8 version)
{
struct batadv_tvlv_handler *tvlv_handler;
@@ -1134,9 +1136,9 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*/
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst, uint8_t type, uint8_t version,
- void *tvlv_value, uint16_t tvlv_value_len)
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
{
struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
struct batadv_tvlv_hdr *tvlv_hdr;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 41d27c7872b9..ebd8af0a1eb0 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2015.1"
+#define BATADV_SOURCE_VERSION "2015.2"
#endif
/* B.A.T.M.A.N. parameters */
@@ -193,7 +193,7 @@ extern struct workqueue_struct *batadv_event_workqueue;
int batadv_mesh_init(struct net_device *soft_iface);
void batadv_mesh_free(struct net_device *soft_iface);
-bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
+bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr);
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq);
int batadv_max_header_len(void);
@@ -202,10 +202,10 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev);
int
-batadv_recv_handler_register(uint8_t packet_type,
+batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *));
-void batadv_recv_handler_unregister(uint8_t packet_type);
+void batadv_recv_handler_unregister(u8 packet_type);
int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
@@ -304,7 +304,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
* they handle overflows/underflows and can correctly check for a
* predecessor/successor unless the variable sequence number has grown by
* more then 2**(bitwidth(x)-1)-1.
- * This means that for a uint8_t with the maximum value 255, it would think:
+ * This means that for a u8 with the maximum value 255, it would think:
* - when adding nothing - it is neither a predecessor nor a successor
* - before adding more than 127 to the starting value - it is a predecessor,
* - when adding 128 - it is neither a predecessor nor a successor,
@@ -327,10 +327,9 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
/* Sum and return the cpu-local counters for index 'idx' */
-static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
- size_t idx)
+static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
{
- uint64_t *counters, sum = 0;
+ u64 *counters, sum = 0;
int cpu;
for_each_possible_cpu(cpu) {
@@ -348,39 +347,38 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version,
- void *tvlv_value, uint16_t tvlv_value_len);
-uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len,
- int packet_min_len);
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len);
void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
struct batadv_ogm_packet *batadv_ogm_packet,
struct batadv_orig_node *orig_node);
void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version);
+ u8 type, u8 version);
void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
void (*optr)(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
+ u8 flags,
void *tvlv_value,
- uint16_t tvlv_value_len),
+ u16 tvlv_value_len),
int (*uptr)(struct batadv_priv *bat_priv,
- uint8_t *src, uint8_t *dst,
+ u8 *src, u8 *dst,
void *tvlv_value,
- uint16_t tvlv_value_len),
- uint8_t type, uint8_t version, uint8_t flags);
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags);
void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- uint8_t type, uint8_t version);
+ u8 type, u8 version);
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
bool ogm_source,
struct batadv_orig_node *orig_node,
- uint8_t *src, uint8_t *dst,
- void *tvlv_buff, uint16_t tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst, uint8_t type, uint8_t version,
- void *tvlv_value, uint16_t tvlv_value_len);
+ u8 *src, u8 *dst,
+ void *tvlv_buff, u16 tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 68a9554961eb..eb76386f8d4b 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -31,6 +31,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -89,7 +90,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
* Returns true if the given address is already in the given list.
* Otherwise returns false.
*/
-static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr,
+static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
struct hlist_head *mcast_list)
{
struct batadv_hw_addr *mcast_entry;
@@ -103,15 +104,19 @@ static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr,
/**
* batadv_mcast_mla_list_free - free a list of multicast addresses
+ * @bat_priv: the bat priv with all the soft interface information
* @mcast_list: the list to free
*
* Removes and frees all items in the given mcast_list.
*/
-static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
+static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
+ struct hlist_head *mcast_list)
{
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
+ lockdep_assert_held(&bat_priv->tt.commit_lock);
+
hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
hlist_del(&mcast_entry->list);
kfree(mcast_entry);
@@ -134,6 +139,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
+ lockdep_assert_held(&bat_priv->tt.commit_lock);
+
hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
list) {
if (mcast_list &&
@@ -164,6 +171,8 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
struct batadv_hw_addr *mcast_entry;
struct hlist_node *tmp;
+ lockdep_assert_held(&bat_priv->tt.commit_lock);
+
if (!mcast_list)
return;
@@ -268,7 +277,7 @@ update:
batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
out:
- batadv_mcast_mla_list_free(&mcast_list);
+ batadv_mcast_mla_list_free(bat_priv, &mcast_list);
}
/**
@@ -595,11 +604,13 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
*/
static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t mcast_flags)
+ u8 mcast_flags)
{
struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node;
struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list;
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) {
@@ -638,11 +649,13 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
*/
static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t mcast_flags)
+ u8 mcast_flags)
{
struct hlist_node *node = &orig->mcast_want_all_ipv4_node;
struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list;
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) {
@@ -681,11 +694,13 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
*/
static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t mcast_flags)
+ u8 mcast_flags)
{
struct hlist_node *node = &orig->mcast_want_all_ipv6_node;
struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list;
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) {
@@ -721,17 +736,17 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
*/
static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
+ u8 flags,
void *tvlv_value,
- uint16_t tvlv_value_len)
+ u16 tvlv_value_len)
{
bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
- uint8_t mcast_flags = BATADV_NO_FLAGS;
+ u8 mcast_flags = BATADV_NO_FLAGS;
bool orig_initialized;
if (orig_mcast_enabled && tvlv_value &&
(tvlv_value_len >= sizeof(mcast_flags)))
- mcast_flags = *(uint8_t *)tvlv_value;
+ mcast_flags = *(u8 *)tvlv_value;
spin_lock_bh(&orig->mcast_handler_lock);
orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index beb6e56c624a..8f3cb04b9f13 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -20,8 +20,6 @@
#include "main.h"
-struct batadv_orig_node;
-struct batadv_priv;
struct sk_buff;
/**
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 46604010dcd4..f5276be2c77c 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -130,9 +130,8 @@ void batadv_nc_status_update(struct net_device *net_dev)
*/
static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
- void *tvlv_value,
- uint16_t tvlv_value_len)
+ u8 flags,
+ void *tvlv_value, u16 tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities);
@@ -382,7 +381,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
- uint32_t i;
+ u32 i;
if (!hash)
return;
@@ -418,7 +417,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
struct hlist_node *node_tmp;
struct batadv_nc_path *nc_path;
spinlock_t *lock; /* Protects lists in hash */
- uint32_t i;
+ u32 i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -478,10 +477,10 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
*
* Returns the selected index in the hash table for the given data.
*/
-static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
+static u32 batadv_nc_hash_choose(const void *data, u32 size)
{
const struct batadv_nc_path *nc_path = data;
- uint32_t hash = 0;
+ u32 hash = 0;
hash = jhash(&nc_path->prev_hop, sizeof(nc_path->prev_hop), hash);
hash = jhash(&nc_path->next_hop, sizeof(nc_path->next_hop), hash);
@@ -587,6 +586,8 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
unsigned long timeout = bat_priv->nc.max_buffer_time;
bool res = false;
+ lockdep_assert_held(&nc_path->packet_list_lock);
+
/* Packets are added to tail, so the remaining packets did not time
* out and we can stop processing the current queue
*/
@@ -623,6 +624,8 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
{
unsigned long timeout = bat_priv->nc.max_fwd_delay;
+ lockdep_assert_held(&nc_path->packet_list_lock);
+
/* Packets are added to tail, so the remaining packets did not time
* out and we can stop processing the current queue
*/
@@ -744,8 +747,8 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
struct batadv_ogm_packet *ogm_packet)
{
struct batadv_orig_ifinfo *orig_ifinfo;
- uint32_t last_real_seqno;
- uint8_t last_ttl;
+ u32 last_real_seqno;
+ u8 last_ttl;
orig_ifinfo = batadv_orig_ifinfo_get(orig_node, BATADV_IF_DEFAULT);
if (!orig_ifinfo)
@@ -873,8 +876,8 @@ free:
}
/**
- * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs
- * (best called on incoming OGMs)
+ * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node
+ * structs (best called on incoming OGMs)
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node originating the ogm packet
* @orig_neigh_node: neighboring orig node from which we received the ogm packet
@@ -888,7 +891,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
struct batadv_ogm_packet *ogm_packet,
int is_single_hop_neigh)
{
- struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL;
+ struct batadv_nc_node *in_nc_node = NULL;
+ struct batadv_nc_node *out_nc_node = NULL;
/* Check if network coding is enabled */
if (!atomic_read(&bat_priv->network_coding))
@@ -938,8 +942,8 @@ out:
*/
static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash,
- uint8_t *src,
- uint8_t *dst)
+ u8 *src,
+ u8 *dst)
{
int hash_added;
struct batadv_nc_path *nc_path, nc_path_key;
@@ -991,9 +995,9 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
* selection of a receiver with slightly lower TQ than the other
* @tq: to be weighted tq value
*/
-static uint8_t batadv_nc_random_weight_tq(uint8_t tq)
+static u8 batadv_nc_random_weight_tq(u8 tq)
{
- uint8_t rand_val, rand_tq;
+ u8 rand_val, rand_tq;
get_random_bytes(&rand_val, sizeof(rand_val));
@@ -1038,7 +1042,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
struct batadv_nc_packet *nc_packet,
struct batadv_neigh_node *neigh_node)
{
- uint8_t tq_weighted_neigh, tq_weighted_coding, tq_tmp;
+ u8 tq_weighted_neigh, tq_weighted_coding, tq_tmp;
struct sk_buff *skb_dest, *skb_src;
struct batadv_unicast_packet *packet1;
struct batadv_unicast_packet *packet2;
@@ -1047,7 +1051,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
struct batadv_neigh_node *router_coding = NULL;
struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL;
struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL;
- uint8_t *first_source, *first_dest, *second_source, *second_dest;
+ u8 *first_source, *first_dest, *second_source, *second_dest;
__be32 packet_id1, packet_id2;
size_t count;
bool res = false;
@@ -1231,8 +1235,7 @@ out:
*
* Returns true if coding of a decoded packet is allowed.
*/
-static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
- uint8_t *dst, uint8_t *src)
+static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
{
if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
return false;
@@ -1255,7 +1258,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
struct batadv_nc_node *in_nc_node,
struct batadv_nc_node *out_nc_node,
struct sk_buff *skb,
- uint8_t *eth_dst)
+ u8 *eth_dst)
{
struct batadv_nc_path *nc_path, nc_path_key;
struct batadv_nc_packet *nc_packet_out = NULL;
@@ -1321,8 +1324,8 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
static struct batadv_nc_packet *
batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
struct sk_buff *skb,
- uint8_t *eth_dst,
- uint8_t *eth_src,
+ u8 *eth_dst,
+ u8 *eth_src,
struct batadv_nc_node *in_nc_node)
{
struct batadv_orig_node *orig_node;
@@ -1362,7 +1365,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
*/
static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
struct sk_buff *skb,
- uint8_t *eth_dst_new)
+ u8 *eth_dst_new)
{
struct ethhdr *ethhdr;
@@ -1638,7 +1641,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_unicast_packet *unicast_packet;
struct batadv_coded_packet coded_packet_tmp;
struct ethhdr *ethhdr, ethhdr_tmp;
- uint8_t *orig_dest, ttl, ttvn;
+ u8 *orig_dest, ttl, ttvn;
unsigned int coding_len;
int err;
@@ -1730,7 +1733,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash = bat_priv->nc.decoding_hash;
struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL;
struct batadv_nc_path *nc_path, nc_path_key;
- uint8_t *dest, *source;
+ u8 *dest, *source;
__be32 packet_id;
int index;
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 5b79aa8c64c1..8f6d4ad8778a 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -22,11 +22,7 @@
#include <linux/types.h>
-struct batadv_nc_node;
-struct batadv_neigh_node;
struct batadv_ogm_packet;
-struct batadv_orig_node;
-struct batadv_priv;
struct net_device;
struct seq_file;
struct sk_buff;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 32a0fcfab36d..7486df9ed48d 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -26,6 +26,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/rculist.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -70,7 +71,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *vlan = NULL, *tmp;
rcu_read_lock();
- list_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) {
+ hlist_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) {
if (tmp->vid != vid)
continue;
@@ -118,7 +119,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
atomic_set(&vlan->refcount, 2);
vlan->vid = vid;
- list_add_rcu(&vlan->list, &orig_node->vlan_list);
+ hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
out:
spin_unlock_bh(&orig_node->vlan_list_lock);
@@ -442,41 +443,6 @@ out:
}
/**
- * batadv_neigh_node_new - create and init a new neigh_node object
- * @hard_iface: the interface where the neighbour is connected to
- * @neigh_addr: the mac address of the neighbour interface
- * @orig_node: originator object representing the neighbour
- *
- * Allocates a new neigh_node object and initialises all the generic fields.
- * Returns the new object or NULL on failure.
- */
-struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr,
- struct batadv_orig_node *orig_node)
-{
- struct batadv_neigh_node *neigh_node;
-
- neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC);
- if (!neigh_node)
- goto out;
-
- INIT_HLIST_NODE(&neigh_node->list);
- INIT_HLIST_HEAD(&neigh_node->ifinfo_list);
- spin_lock_init(&neigh_node->ifinfo_lock);
-
- ether_addr_copy(neigh_node->addr, neigh_addr);
- neigh_node->if_incoming = hard_iface;
- neigh_node->orig_node = orig_node;
-
- /* extra reference for return */
- atomic_set(&neigh_node->refcount, 2);
-
-out:
- return neigh_node;
-}
-
-/**
* batadv_neigh_node_get - retrieve a neighbour from the list
* @orig_node: originator which the neighbour belongs to
* @hard_iface: the interface where this neighbour is connected to
@@ -486,10 +452,10 @@ out:
* which is connected through the provided hard interface.
* Returns NULL if the neighbour is not found.
*/
-struct batadv_neigh_node *
+static struct batadv_neigh_node *
batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *hard_iface,
- const uint8_t *addr)
+ const u8 *addr)
{
struct batadv_neigh_node *tmp_neigh_node, *res = NULL;
@@ -513,6 +479,59 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
}
/**
+ * batadv_neigh_node_new - create and init a new neigh_node object
+ * @orig_node: originator object representing the neighbour
+ * @hard_iface: the interface where the neighbour is connected to
+ * @neigh_addr: the mac address of the neighbour interface
+ *
+ * Allocates a new neigh_node object and initialises all the generic fields.
+ * Returns the new object or NULL on failure.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_new(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
+{
+ struct batadv_neigh_node *neigh_node;
+
+ neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
+ if (neigh_node)
+ goto out;
+
+ neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC);
+ if (!neigh_node)
+ goto out;
+
+ if (!atomic_inc_not_zero(&hard_iface->refcount)) {
+ kfree(neigh_node);
+ neigh_node = NULL;
+ goto out;
+ }
+
+ INIT_HLIST_NODE(&neigh_node->list);
+ INIT_HLIST_HEAD(&neigh_node->ifinfo_list);
+ spin_lock_init(&neigh_node->ifinfo_lock);
+
+ ether_addr_copy(neigh_node->addr, neigh_addr);
+ neigh_node->if_incoming = hard_iface;
+ neigh_node->orig_node = orig_node;
+
+ /* extra reference for return */
+ atomic_set(&neigh_node->refcount, 2);
+
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+ batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
+ "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+ neigh_addr, orig_node->orig, hard_iface->net_dev->name);
+
+out:
+ return neigh_node;
+}
+
+/**
* batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
* @rcu: rcu pointer of the orig_ifinfo object
*/
@@ -624,7 +643,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
struct hlist_head *head;
spinlock_t *list_lock; /* spinlock to protect write access */
struct batadv_orig_node *orig_node;
- uint32_t i;
+ u32 i;
if (!hash)
return;
@@ -659,7 +678,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
* Returns the newly created object or NULL on failure.
*/
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
- const uint8_t *addr)
+ const u8 *addr)
{
struct batadv_orig_node *orig_node;
struct batadv_orig_node_vlan *vlan;
@@ -674,7 +693,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
return NULL;
INIT_HLIST_HEAD(&orig_node->neigh_list);
- INIT_LIST_HEAD(&orig_node->vlan_list);
+ INIT_HLIST_HEAD(&orig_node->vlan_list);
INIT_HLIST_HEAD(&orig_node->ifinfo_list);
spin_lock_init(&orig_node->bcast_seqno_lock);
spin_lock_init(&orig_node->neigh_list_lock);
@@ -981,7 +1000,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
struct hlist_head *head;
spinlock_t *list_lock; /* spinlock to protect write access */
struct batadv_orig_node *orig_node;
- uint32_t i;
+ u32 i;
if (!hash)
return;
@@ -1010,7 +1029,6 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
spin_unlock_bh(list_lock);
}
- batadv_gw_node_purge(bat_priv);
batadv_gw_election(bat_priv);
}
@@ -1115,7 +1133,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
- uint32_t i;
+ u32 i;
int ret;
/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
@@ -1152,7 +1170,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
struct batadv_hard_iface *hard_iface_tmp;
struct batadv_orig_node *orig_node;
struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
- uint32_t i;
+ u32 i;
int ret;
/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 79734d302010..fa18f9bf266b 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -40,15 +40,11 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
- const uint8_t *addr);
+ const u8 *addr);
struct batadv_neigh_node *
-batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
- const struct batadv_hard_iface *hard_iface,
- const uint8_t *addr);
-struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr,
- struct batadv_orig_node *orig_node);
+batadv_neigh_node_new(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr);
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
@@ -86,9 +82,9 @@ void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
/* hashfunction to choose an entry in a hash table of given size
* hash algorithm from http://en.wikipedia.org/wiki/Hash_table
*/
-static inline uint32_t batadv_choose_orig(const void *data, uint32_t size)
+static inline u32 batadv_choose_orig(const void *data, u32 size)
{
- uint32_t hash = 0;
+ u32 hash = 0;
hash = jhash(data, ETH_ALEN, hash);
return hash % size;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 9e747c08d0bc..11f996b39fef 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -197,8 +197,8 @@ enum batadv_tvlv_type {
* transport the claim type and the group id
*/
struct batadv_bla_claim_dst {
- uint8_t magic[3]; /* FF:43:05 */
- uint8_t type; /* bla_claimframe */
+ u8 magic[3]; /* FF:43:05 */
+ u8 type; /* bla_claimframe */
__be16 group; /* group id */
};
@@ -213,16 +213,16 @@ struct batadv_bla_claim_dst {
* @tvlv_len: length of tvlv data following the ogm header
*/
struct batadv_ogm_packet {
- uint8_t packet_type;
- uint8_t version;
- uint8_t ttl;
- uint8_t flags;
- __be32 seqno;
- uint8_t orig[ETH_ALEN];
- uint8_t prev_sender[ETH_ALEN];
- uint8_t reserved;
- uint8_t tq;
- __be16 tvlv_len;
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 flags;
+ __be32 seqno;
+ u8 orig[ETH_ALEN];
+ u8 prev_sender[ETH_ALEN];
+ u8 reserved;
+ u8 tq;
+ __be16 tvlv_len;
/* __packed is not needed as the struct size is divisible by 4,
* and the largest data type in this struct has a size of 4.
*/
@@ -246,14 +246,14 @@ struct batadv_ogm_packet {
* members are padded the same way as they are in real packets.
*/
struct batadv_icmp_header {
- uint8_t packet_type;
- uint8_t version;
- uint8_t ttl;
- uint8_t msg_type; /* see ICMP message types above */
- uint8_t dst[ETH_ALEN];
- uint8_t orig[ETH_ALEN];
- uint8_t uid;
- uint8_t align[3];
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 msg_type; /* see ICMP message types above */
+ u8 dst[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ u8 uid;
+ u8 align[3];
};
/**
@@ -269,15 +269,15 @@ struct batadv_icmp_header {
* @seqno: ICMP sequence number
*/
struct batadv_icmp_packet {
- uint8_t packet_type;
- uint8_t version;
- uint8_t ttl;
- uint8_t msg_type; /* see ICMP message types above */
- uint8_t dst[ETH_ALEN];
- uint8_t orig[ETH_ALEN];
- uint8_t uid;
- uint8_t reserved;
- __be16 seqno;
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 msg_type; /* see ICMP message types above */
+ u8 dst[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ u8 uid;
+ u8 reserved;
+ __be16 seqno;
};
#define BATADV_RR_LEN 16
@@ -296,16 +296,16 @@ struct batadv_icmp_packet {
* @rr: route record array
*/
struct batadv_icmp_packet_rr {
- uint8_t packet_type;
- uint8_t version;
- uint8_t ttl;
- uint8_t msg_type; /* see ICMP message types above */
- uint8_t dst[ETH_ALEN];
- uint8_t orig[ETH_ALEN];
- uint8_t uid;
- uint8_t rr_cur;
- __be16 seqno;
- uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 msg_type; /* see ICMP message types above */
+ u8 dst[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ u8 uid;
+ u8 rr_cur;
+ __be16 seqno;
+ u8 rr[BATADV_RR_LEN][ETH_ALEN];
};
#define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr)
@@ -331,11 +331,11 @@ struct batadv_icmp_packet_rr {
* @dest: originator destination of the unicast packet
*/
struct batadv_unicast_packet {
- uint8_t packet_type;
- uint8_t version;
- uint8_t ttl;
- uint8_t ttvn; /* destination translation table version number */
- uint8_t dest[ETH_ALEN];
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 ttvn; /* destination translation table version number */
+ u8 dest[ETH_ALEN];
/* "4 bytes boundary + 2 bytes" long to make the payload after the
* following ethernet header again 4 bytes boundary aligned
*/
@@ -349,9 +349,9 @@ struct batadv_unicast_packet {
*/
struct batadv_unicast_4addr_packet {
struct batadv_unicast_packet u;
- uint8_t src[ETH_ALEN];
- uint8_t subtype;
- uint8_t reserved;
+ u8 src[ETH_ALEN];
+ u8 subtype;
+ u8 reserved;
/* "4 bytes boundary + 2 bytes" long to make the payload after the
* following ethernet header again 4 bytes boundary aligned
*/
@@ -370,22 +370,22 @@ struct batadv_unicast_4addr_packet {
* @total_size: size of the merged packet
*/
struct batadv_frag_packet {
- uint8_t packet_type;
- uint8_t version; /* batman version field */
- uint8_t ttl;
+ u8 packet_type;
+ u8 version; /* batman version field */
+ u8 ttl;
#if defined(__BIG_ENDIAN_BITFIELD)
- uint8_t no:4;
- uint8_t reserved:4;
+ u8 no:4;
+ u8 reserved:4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- uint8_t reserved:4;
- uint8_t no:4;
+ u8 reserved:4;
+ u8 no:4;
#else
#error "unknown bitfield endianness"
#endif
- uint8_t dest[ETH_ALEN];
- uint8_t orig[ETH_ALEN];
- __be16 seqno;
- __be16 total_size;
+ u8 dest[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ __be16 seqno;
+ __be16 total_size;
};
/**
@@ -398,12 +398,12 @@ struct batadv_frag_packet {
* @orig: originator of the broadcast packet
*/
struct batadv_bcast_packet {
- uint8_t packet_type;
- uint8_t version; /* batman version field */
- uint8_t ttl;
- uint8_t reserved;
- __be32 seqno;
- uint8_t orig[ETH_ALEN];
+ u8 packet_type;
+ u8 version; /* batman version field */
+ u8 ttl;
+ u8 reserved;
+ __be32 seqno;
+ u8 orig[ETH_ALEN];
/* "4 bytes boundary + 2 bytes" long to make the payload after the
* following ethernet header again 4 bytes boundary aligned
*/
@@ -428,21 +428,21 @@ struct batadv_bcast_packet {
* @coded_len: length of network coded part of the payload
*/
struct batadv_coded_packet {
- uint8_t packet_type;
- uint8_t version; /* batman version field */
- uint8_t ttl;
- uint8_t first_ttvn;
- /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */
- uint8_t first_source[ETH_ALEN];
- uint8_t first_orig_dest[ETH_ALEN];
- __be32 first_crc;
- uint8_t second_ttl;
- uint8_t second_ttvn;
- uint8_t second_dest[ETH_ALEN];
- uint8_t second_source[ETH_ALEN];
- uint8_t second_orig_dest[ETH_ALEN];
- __be32 second_crc;
- __be16 coded_len;
+ u8 packet_type;
+ u8 version; /* batman version field */
+ u8 ttl;
+ u8 first_ttvn;
+ /* u8 first_dest[ETH_ALEN]; - saved in mac header destination */
+ u8 first_source[ETH_ALEN];
+ u8 first_orig_dest[ETH_ALEN];
+ __be32 first_crc;
+ u8 second_ttl;
+ u8 second_ttvn;
+ u8 second_dest[ETH_ALEN];
+ u8 second_source[ETH_ALEN];
+ u8 second_orig_dest[ETH_ALEN];
+ __be32 second_crc;
+ __be16 coded_len;
};
#pragma pack()
@@ -459,14 +459,14 @@ struct batadv_coded_packet {
* @align: 2 bytes to align the header to a 4 byte boundary
*/
struct batadv_unicast_tvlv_packet {
- uint8_t packet_type;
- uint8_t version; /* batman version field */
- uint8_t ttl;
- uint8_t reserved;
- uint8_t dst[ETH_ALEN];
- uint8_t src[ETH_ALEN];
- __be16 tvlv_len;
- uint16_t align;
+ u8 packet_type;
+ u8 version; /* batman version field */
+ u8 ttl;
+ u8 reserved;
+ u8 dst[ETH_ALEN];
+ u8 src[ETH_ALEN];
+ __be16 tvlv_len;
+ u16 align;
};
/**
@@ -476,9 +476,9 @@ struct batadv_unicast_tvlv_packet {
* @len: tvlv container length
*/
struct batadv_tvlv_hdr {
- uint8_t type;
- uint8_t version;
- __be16 len;
+ u8 type;
+ u8 version;
+ __be16 len;
};
/**
@@ -500,9 +500,9 @@ struct batadv_tvlv_gateway_data {
* one batadv_tvlv_tt_vlan_data object per announced vlan
*/
struct batadv_tvlv_tt_data {
- uint8_t flags;
- uint8_t ttvn;
- __be16 num_vlan;
+ u8 flags;
+ u8 ttvn;
+ __be16 num_vlan;
};
/**
@@ -513,9 +513,9 @@ struct batadv_tvlv_tt_data {
* @reserved: unused, useful for alignment purposes
*/
struct batadv_tvlv_tt_vlan_data {
- __be32 crc;
- __be16 vid;
- uint16_t reserved;
+ __be32 crc;
+ __be16 vid;
+ u16 reserved;
};
/**
@@ -527,9 +527,9 @@ struct batadv_tvlv_tt_vlan_data {
* @vid: VLAN identifier
*/
struct batadv_tvlv_tt_change {
- uint8_t flags;
- uint8_t reserved[3];
- uint8_t addr[ETH_ALEN];
+ u8 flags;
+ u8 reserved[3];
+ u8 addr[ETH_ALEN];
__be16 vid;
};
@@ -539,7 +539,7 @@ struct batadv_tvlv_tt_change {
* @vid: VLAN identifier
*/
struct batadv_tvlv_roam_adv {
- uint8_t client[ETH_ALEN];
+ u8 client[ETH_ALEN];
__be16 vid;
};
@@ -549,8 +549,8 @@ struct batadv_tvlv_roam_adv {
* @reserved: reserved field
*/
struct batadv_tvlv_mcast_data {
- uint8_t flags;
- uint8_t reserved[3];
+ u8 flags;
+ u8 reserved[3];
};
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index c360c0cd19c2..8d990b070a2e 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -145,7 +145,7 @@ out:
* 0 if the packet is to be accepted
* 1 if the packet is to be ignored.
*/
-int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff,
+int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
unsigned long *last_reset)
{
if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE ||
@@ -653,19 +653,19 @@ out:
static bool
batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
struct batadv_unicast_packet *unicast_packet,
- uint8_t *dst_addr, unsigned short vid)
+ u8 *dst_addr, unsigned short vid)
{
struct batadv_orig_node *orig_node = NULL;
struct batadv_hard_iface *primary_if = NULL;
bool ret = false;
- uint8_t *orig_addr, orig_ttvn;
+ u8 *orig_addr, orig_ttvn;
if (batadv_is_my_client(bat_priv, dst_addr, vid)) {
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
orig_addr = primary_if->net_dev->dev_addr;
- orig_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ orig_ttvn = (u8)atomic_read(&bat_priv->tt.vn);
} else {
orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr,
vid);
@@ -676,7 +676,7 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
goto out;
orig_addr = orig_node->orig;
- orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ orig_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
}
/* update the packet header */
@@ -698,7 +698,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
struct batadv_unicast_packet *unicast_packet;
struct batadv_hard_iface *primary_if;
struct batadv_orig_node *orig_node;
- uint8_t curr_ttvn, old_ttvn;
+ u8 curr_ttvn, old_ttvn;
struct ethhdr *ethhdr;
unsigned short vid;
int is_old_ttvn;
@@ -740,7 +740,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
* value is used later to check if the node which sent (or re-routed
* last time) the packet had an updated information or not
*/
- curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ curr_ttvn = (u8)atomic_read(&bat_priv->tt.vn);
if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
orig_node = batadv_orig_hash_find(bat_priv,
unicast_packet->dest);
@@ -751,7 +751,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
if (!orig_node)
return 0;
- curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
batadv_orig_node_free_ref(orig_node);
}
@@ -833,7 +833,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_packet *unicast_packet;
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
- uint8_t *orig_addr;
+ u8 *orig_addr;
struct batadv_orig_node *orig_node = NULL;
int check, hdr_size = sizeof(*unicast_packet);
bool is4addr;
@@ -904,7 +904,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
unsigned char *tvlv_buff;
- uint16_t tvlv_buff_len;
+ u16 tvlv_buff_len;
int hdr_size = sizeof(*unicast_tvlv_packet);
int ret = NET_RX_DROP;
@@ -1007,8 +1007,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
struct ethhdr *ethhdr;
int hdr_size = sizeof(*bcast_packet);
int ret = NET_RX_DROP;
- int32_t seq_diff;
- uint32_t seqno;
+ s32 seq_diff;
+ u32 seqno;
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 6bc29d33abc1..204bbe4952a6 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -22,10 +22,6 @@
#include <linux/types.h>
-struct batadv_hard_iface;
-struct batadv_neigh_node;
-struct batadv_orig_node;
-struct batadv_priv;
struct sk_buff;
bool batadv_check_management_packet(struct sk_buff *skb,
@@ -55,7 +51,7 @@ struct batadv_neigh_node *
batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
-int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff,
+int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
unsigned long *last_reset);
#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 191076ef1eca..f664324805eb 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -54,7 +54,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
*/
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
- const uint8_t *dst_addr)
+ const u8 *dst_addr)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct ethhdr *ethhdr;
@@ -172,7 +172,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
struct batadv_orig_node *orig_node)
{
struct batadv_unicast_packet *unicast_packet;
- uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ u8 ttvn = (u8)atomic_read(&orig_node->last_ttvn);
if (batadv_skb_head_push(skb, hdr_size) < 0)
return false;
@@ -343,12 +343,12 @@ out:
*/
int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
struct sk_buff *skb, int packet_type,
- int packet_subtype, uint8_t *dst_hint,
+ int packet_subtype, u8 *dst_hint,
unsigned short vid)
{
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
struct batadv_orig_node *orig_node;
- uint8_t *src, *dst;
+ u8 *src, *dst;
src = ethhdr->h_source;
dst = ethhdr->h_dest;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 0536835fe503..82059f259e46 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -25,15 +25,12 @@
#include "packet.h"
-struct batadv_hard_iface;
-struct batadv_orig_node;
-struct batadv_priv;
struct sk_buff;
struct work_struct;
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
- const uint8_t *dst_addr);
+ const u8 *dst_addr);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
@@ -56,7 +53,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
unsigned short vid);
int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
struct sk_buff *skb, int packet_type,
- int packet_subtype, uint8_t *dst_hint,
+ int packet_subtype, u8 *dst_hint,
unsigned short vid);
int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid);
@@ -75,7 +72,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
* Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
- struct sk_buff *skb, uint8_t *dst_hint,
+ struct sk_buff *skb, u8 *dst_hint,
unsigned short vid)
{
return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0,
@@ -100,7 +97,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv,
struct sk_buff *skb,
int packet_subtype,
- uint8_t *dst_hint,
+ u8 *dst_hint,
unsigned short vid)
{
return batadv_send_skb_via_tt_generic(bat_priv, skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 51cda3a7c51d..ac4d08de5df4 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -131,7 +131,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
struct batadv_priv *bat_priv = netdev_priv(dev);
struct batadv_softif_vlan *vlan;
struct sockaddr *addr = p;
- uint8_t old_addr[ETH_ALEN];
+ u8 old_addr[ETH_ALEN];
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
@@ -186,19 +186,19 @@ static int batadv_interface_tx(struct sk_buff *skb,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_bcast_packet *bcast_packet;
__be16 ethertype = htons(ETH_P_BATMAN);
- static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
- 0x00, 0x00};
- static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
- 0x00, 0x00};
+ static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
+ 0x00, 0x00};
+ static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
+ 0x00, 0x00};
enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO;
- uint8_t *dst_hint = NULL, chaddr[ETH_ALEN];
+ u8 *dst_hint = NULL, chaddr[ETH_ALEN];
struct vlan_ethhdr *vhdr;
unsigned int header_len = 0;
int data_len = skb->len, ret;
unsigned long brd_delay = 1;
bool do_bcast = false, client_added;
unsigned short vid;
- uint32_t seqno;
+ u32 seqno;
int gw_mode;
enum batadv_forw_mode forw_mode;
struct batadv_orig_node *mcast_single_orig = NULL;
@@ -750,9 +750,9 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
static int batadv_softif_init_late(struct net_device *dev)
{
struct batadv_priv *bat_priv;
- uint32_t random_seqno;
+ u32 random_seqno;
int ret;
- size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
+ size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM;
batadv_set_lockdep_class(dev);
@@ -763,7 +763,7 @@ static int batadv_softif_init_late(struct net_device *dev)
/* batadv_interface_stats() needs to be available as soon as
* register_netdevice() has been called
*/
- bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
+ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(u64));
if (!bat_priv->bat_counters)
return -ENOMEM;
@@ -941,14 +941,12 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->destructor = batadv_softif_free;
dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
/* can't call min_mtu, because the needed variables
* have not been initialized yet
*/
dev->mtu = ETH_DATA_LEN;
- /* reserve more space in the skbuff for our header */
- dev->hard_header_len = batadv_max_header_len();
/* generate random address */
eth_hw_addr_random(dev);
@@ -1117,8 +1115,7 @@ static const struct {
#endif
};
-static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
- uint8_t *data)
+static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
if (stringset == ETH_SS_STATS)
memcpy(data, batadv_counters_strings,
@@ -1126,8 +1123,7 @@ static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
}
static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
+ struct ethtool_stats *stats, u64 *data)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
int i;
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 578e8a663c30..8e82176f40b1 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -22,10 +22,6 @@
#include <net/rtnetlink.h>
-struct batadv_hard_iface;
-struct batadv_orig_node;
-struct batadv_priv;
-struct batadv_softif_vlan;
struct net_device;
struct sk_buff;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index d6a312a82c03..9de3c8804ff4 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -457,7 +457,7 @@ static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
struct attribute *attr, char *buff)
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
- uint32_t down, up;
+ u32 down, up;
down = atomic_read(&bat_priv->gw.bandwidth_down);
up = atomic_read(&bat_priv->gw.bandwidth_up);
@@ -512,7 +512,7 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
{
struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- uint32_t mark, mask;
+ u32 mark, mask;
char *mask_ptr;
/* parse the mask if it has been specified, otherwise assume the mask is
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 2294583f7cf9..61974428a7af 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -23,8 +23,6 @@
#include <linux/sysfs.h>
#include <linux/types.h>
-struct batadv_priv;
-struct batadv_softif_vlan;
struct kobject;
struct net_device;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index db06de20d996..4228b10c47ea 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -56,7 +56,7 @@
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
static struct lock_class_key batadv_tt_global_hash_lock_class_key;
-static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
+static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
unsigned short vid,
struct batadv_orig_node *orig_node);
static void batadv_tt_purge(struct work_struct *work);
@@ -85,10 +85,10 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
* Returns the hash index where the object represented by 'data' should be
* stored at.
*/
-static inline uint32_t batadv_choose_tt(const void *data, uint32_t size)
+static inline u32 batadv_choose_tt(const void *data, u32 size)
{
struct batadv_tt_common_entry *tt;
- uint32_t hash = 0;
+ u32 hash = 0;
tt = (struct batadv_tt_common_entry *)data;
hash = jhash(&tt->addr, ETH_ALEN, hash);
@@ -107,12 +107,12 @@ static inline uint32_t batadv_choose_tt(const void *data, uint32_t size)
* found, NULL otherwise.
*/
static struct batadv_tt_common_entry *
-batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr,
+batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
unsigned short vid)
{
struct hlist_head *head;
struct batadv_tt_common_entry to_search, *tt, *tt_tmp = NULL;
- uint32_t index;
+ u32 index;
if (!hash)
return NULL;
@@ -152,7 +152,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr,
* found, NULL otherwise.
*/
static struct batadv_tt_local_entry *
-batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr,
+batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
{
struct batadv_tt_common_entry *tt_common_entry;
@@ -177,7 +177,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr,
* is found, NULL otherwise.
*/
static struct batadv_tt_global_entry *
-batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr,
+batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
{
struct batadv_tt_common_entry *tt_common_entry;
@@ -223,7 +223,7 @@ batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
* (excluding ourself).
*/
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid)
+ const u8 *addr, unsigned short vid)
{
struct batadv_tt_global_entry *tt_global_entry;
int count;
@@ -315,7 +315,7 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
spin_lock_bh(&orig_node->vlan_list_lock);
- list_del_rcu(&vlan->list);
+ hlist_del_init_rcu(&vlan->list);
spin_unlock_bh(&orig_node->vlan_list_lock);
batadv_orig_node_vlan_free_ref(vlan);
}
@@ -364,11 +364,11 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
*/
static void batadv_tt_local_event(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry,
- uint8_t event_flags)
+ u8 event_flags)
{
struct batadv_tt_change_node *tt_change_node, *entry, *safe;
struct batadv_tt_common_entry *common = &tt_local_entry->common;
- uint8_t flags = common->flags | event_flags;
+ u8 flags = common->flags | event_flags;
bool event_removed = false;
bool del_op_requested, del_op_entry;
@@ -448,7 +448,7 @@ static int batadv_tt_len(int changes_num)
*
* Returns the number of entries.
*/
-static uint16_t batadv_tt_entries(uint16_t tt_len)
+static u16 batadv_tt_entries(u16 tt_len)
{
return tt_len / batadv_tt_len(1);
}
@@ -462,7 +462,8 @@ static uint16_t batadv_tt_entries(uint16_t tt_len)
*/
static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
{
- uint16_t num_vlan = 0, tt_local_entries = 0;
+ u16 num_vlan = 0;
+ u16 tt_local_entries = 0;
struct batadv_softif_vlan *vlan;
int hdr_size;
@@ -525,8 +526,8 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
*
* Returns true if the client was successfully added, false otherwise.
*/
-bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
- unsigned short vid, int ifindex, uint32_t mark)
+bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
+ unsigned short vid, int ifindex, u32 mark)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
@@ -536,9 +537,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
int hash_added, table_size, packet_size_max;
- bool ret = false, roamed_back = false;
- uint8_t remote_flags;
- uint32_t match_mark;
+ bool ret = false;
+ bool roamed_back = false;
+ u8 remote_flags;
+ u32 match_mark;
if (ifindex != BATADV_NULL_IFINDEX)
in_dev = dev_get_by_index(&init_net, ifindex);
@@ -596,13 +598,16 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
/* increase the refcounter of the related vlan */
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
- addr, BATADV_PRINT_VID(vid)))
+ addr, BATADV_PRINT_VID(vid))) {
+ kfree(tt_local);
+ tt_local = NULL;
goto out;
+ }
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
addr, BATADV_PRINT_VID(vid),
- (uint8_t)atomic_read(&bat_priv->tt.vn));
+ (u8)atomic_read(&bat_priv->tt.vn));
ether_addr_copy(tt_local->common.addr, addr);
/* The local entry has to be marked as NEW to avoid to send it in
@@ -721,19 +726,22 @@ out:
*
* Return the size of the allocated buffer or 0 in case of failure.
*/
-static uint16_t
+static u16
batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
struct batadv_tvlv_tt_data **tt_data,
struct batadv_tvlv_tt_change **tt_change,
- int32_t *tt_len)
+ s32 *tt_len)
{
- uint16_t num_vlan = 0, num_entries = 0, change_offset, tvlv_len;
+ u16 num_vlan = 0;
+ u16 num_entries = 0;
+ u16 change_offset;
+ u16 tvlv_len;
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_orig_node_vlan *vlan;
- uint8_t *tt_change_ptr;
+ u8 *tt_change_ptr;
rcu_read_lock();
- list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
num_vlan++;
num_entries += atomic_read(&vlan->tt.num_entries);
}
@@ -759,14 +767,14 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
(*tt_data)->num_vlan = htons(num_vlan);
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
- list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
tt_vlan++;
}
- tt_change_ptr = (uint8_t *)*tt_data + change_offset;
+ tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
@@ -792,16 +800,18 @@ out:
*
* Return the size of the allocated buffer or 0 in case of failure.
*/
-static uint16_t
+static u16
batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data **tt_data,
struct batadv_tvlv_tt_change **tt_change,
- int32_t *tt_len)
+ s32 *tt_len)
{
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_softif_vlan *vlan;
- uint16_t num_vlan = 0, num_entries = 0, tvlv_len;
- uint8_t *tt_change_ptr;
+ u16 num_vlan = 0;
+ u16 num_entries = 0;
+ u16 tvlv_len;
+ u8 *tt_change_ptr;
int change_offset;
rcu_read_lock();
@@ -838,7 +848,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan++;
}
- tt_change_ptr = (uint8_t *)*tt_data + change_offset;
+ tt_change_ptr = (u8 *)*tt_data + change_offset;
*tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr;
out:
@@ -857,8 +867,9 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
struct batadv_tvlv_tt_data *tt_data;
struct batadv_tvlv_tt_change *tt_change;
int tt_diff_len, tt_change_len = 0;
- int tt_diff_entries_num = 0, tt_diff_entries_count = 0;
- uint16_t tvlv_len;
+ int tt_diff_entries_num = 0;
+ int tt_diff_entries_count = 0;
+ u16 tvlv_len;
tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes);
tt_diff_len = batadv_tt_len(tt_diff_entries_num);
@@ -932,12 +943,12 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_softif_vlan *vlan;
struct hlist_head *head;
unsigned short vid;
- uint32_t i;
+ u32 i;
int last_seen_secs;
int last_seen_msecs;
unsigned long last_seen_jiffies;
bool no_purge;
- uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE;
+ u16 np_flag = BATADV_TT_CLIENT_NOPURGE;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
@@ -945,7 +956,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
- net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn));
+ net_dev->name, (u8)atomic_read(&bat_priv->tt.vn));
seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID",
"Flags", "Last seen", "CRC");
@@ -1005,7 +1016,7 @@ out:
static void
batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry,
- uint16_t flags, const char *message)
+ u16 flags, const char *message)
{
batadv_tt_local_event(bat_priv, tt_local_entry, flags);
@@ -1031,12 +1042,12 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
*
* Returns the flags assigned to the local entry before being deleted
*/
-uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid,
- const char *message, bool roaming)
+u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
+ unsigned short vid, const char *message,
+ bool roaming)
{
struct batadv_tt_local_entry *tt_local_entry;
- uint16_t flags, curr_flags = BATADV_NO_FLAGS;
+ u16 flags, curr_flags = BATADV_NO_FLAGS;
struct batadv_softif_vlan *vlan;
void *tt_entry_exists;
@@ -1139,7 +1150,7 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
- uint32_t i;
+ u32 i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1160,7 +1171,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
if (!bat_priv->tt.local_hash)
return;
@@ -1335,15 +1346,14 @@ out:
static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *tt_addr,
- unsigned short vid, uint16_t flags,
- uint8_t ttvn)
+ unsigned short vid, u16 flags, u8 ttvn)
{
struct batadv_tt_global_entry *tt_global_entry;
struct batadv_tt_local_entry *tt_local_entry;
bool ret = false;
int hash_added;
struct batadv_tt_common_entry *common;
- uint16_t local_flags;
+ u16 local_flags;
/* ignore global entries from backbone nodes */
if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
@@ -1540,8 +1550,8 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_orig_node_vlan *vlan;
struct hlist_head *head;
- uint8_t last_ttvn;
- uint16_t flags;
+ u8 last_ttvn;
+ u16 flags;
tt_common_entry = &tt_global_entry->common;
flags = tt_common_entry->flags;
@@ -1615,7 +1625,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_global_entry *tt_global;
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
primary_if = batadv_seq_print_text_primary_if_get(seq);
if (!primary_if)
@@ -1648,20 +1658,28 @@ out:
}
/**
- * batadv_tt_global_del_orig_entry - remove and free an orig_entry
+ * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
* @tt_global_entry: the global entry to remove the orig_entry from
* @orig_entry: the orig entry to remove and free
*
* Remove an orig_entry from its list in the given tt_global_entry and
* free this orig_entry afterwards.
+ *
+ * Caller must hold tt_global_entry->list_lock and ensure orig_entry->list is
+ * part of a list.
*/
static void
-batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
- struct batadv_tt_orig_list_entry *orig_entry)
+_batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
+ struct batadv_tt_orig_list_entry *orig_entry)
{
+ lockdep_assert_held(&tt_global_entry->list_lock);
+
batadv_tt_global_size_dec(orig_entry->orig_node,
tt_global_entry->common.vid);
atomic_dec(&tt_global_entry->orig_list_count);
+ /* requires holding tt_global_entry->list_lock and orig_entry->list
+ * being part of a list
+ */
hlist_del_rcu(&orig_entry->list);
batadv_tt_orig_list_entry_free_ref(orig_entry);
}
@@ -1677,7 +1695,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
spin_lock_bh(&tt_global_entry->list_lock);
head = &tt_global_entry->orig_list;
hlist_for_each_entry_safe(orig_entry, safe, head, list)
- batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry);
+ _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry);
spin_unlock_bh(&tt_global_entry->list_lock);
}
@@ -1712,8 +1730,8 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv,
orig_node->orig,
tt_global_entry->common.addr,
BATADV_PRINT_VID(vid), message);
- batadv_tt_global_del_orig_entry(tt_global_entry,
- orig_entry);
+ _batadv_tt_global_del_orig_entry(tt_global_entry,
+ orig_entry);
}
}
spin_unlock_bh(&tt_global_entry->list_lock);
@@ -1835,12 +1853,12 @@ out:
*/
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- int32_t match_vid,
+ s32 match_vid,
const char *message)
{
struct batadv_tt_global_entry *tt_global;
struct batadv_tt_common_entry *tt_common_entry;
- uint32_t i;
+ u32 i;
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct hlist_node *safe;
struct hlist_head *head;
@@ -1911,7 +1929,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
struct hlist_head *head;
struct hlist_node *node_tmp;
spinlock_t *list_lock; /* protects write access to the hash lists */
- uint32_t i;
+ u32 i;
char *msg = NULL;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
@@ -1952,7 +1970,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
struct batadv_tt_global_entry *tt_global;
struct hlist_node *node_tmp;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
if (!bat_priv->tt.global_hash)
return;
@@ -2013,8 +2031,8 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
* If the two clients are AP isolated the function returns NULL.
*/
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
- const uint8_t *src,
- const uint8_t *addr,
+ const u8 *src,
+ const u8 *addr,
unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry = NULL;
@@ -2082,16 +2100,16 @@ out:
*
* Returns the checksum of the global table of a given originator.
*/
-static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- unsigned short vid)
+static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
- uint32_t i, crc_tmp, crc = 0;
- uint8_t flags;
+ u32 i, crc_tmp, crc = 0;
+ u8 flags;
__be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
@@ -2159,14 +2177,14 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
*
* Returns the checksum of the local table
*/
-static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
- unsigned short vid)
+static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
+ unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct hlist_head *head;
- uint32_t i, crc_tmp, crc = 0;
- uint8_t flags;
+ u32 i, crc_tmp, crc = 0;
+ u8 flags;
__be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
@@ -2208,12 +2226,13 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
{
- struct batadv_tt_req_node *node, *safe;
+ struct batadv_tt_req_node *node;
+ struct hlist_node *safe;
spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
- list_del_init(&node->list);
+ hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
+ hlist_del_init(&node->list);
kfree(node);
}
@@ -2223,7 +2242,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const void *tt_buff,
- uint16_t tt_buff_len)
+ u16 tt_buff_len)
{
/* Replace the old buffer only if I received something in the
* last OGM (the OGM could carry no changes)
@@ -2243,30 +2262,36 @@ static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
{
- struct batadv_tt_req_node *node, *safe;
+ struct batadv_tt_req_node *node;
+ struct hlist_node *safe;
spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
+ hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (batadv_has_timed_out(node->issued_at,
BATADV_TT_REQUEST_TIMEOUT)) {
- list_del_init(&node->list);
+ hlist_del_init(&node->list);
kfree(node);
}
}
spin_unlock_bh(&bat_priv->tt.req_list_lock);
}
-/* returns the pointer to the new tt_req_node struct if no request
- * has already been issued for this orig_node, NULL otherwise
+/**
+ * batadv_tt_req_node_new - search and possibly create a tt_req_node object
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: orig node this request is being issued for
+ *
+ * Returns the pointer to the new tt_req_node struct if no request
+ * has already been issued for this orig_node, NULL otherwise.
*/
static struct batadv_tt_req_node *
-batadv_new_tt_req_node(struct batadv_priv *bat_priv,
+batadv_tt_req_node_new(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL;
spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) {
+ hlist_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) {
if (batadv_compare_eth(tt_req_node_tmp, orig_node) &&
!batadv_has_timed_out(tt_req_node_tmp->issued_at,
BATADV_TT_REQUEST_TIMEOUT))
@@ -2280,7 +2305,7 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv,
ether_addr_copy(tt_req_node->addr, orig_node->orig);
tt_req_node->issued_at = jiffies;
- list_add(&tt_req_node->list, &bat_priv->tt.req_list);
+ hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list);
unlock:
spin_unlock_bh(&bat_priv->tt.req_list_lock);
return tt_req_node;
@@ -2332,15 +2357,15 @@ static int batadv_tt_global_valid(const void *entry_ptr,
*/
static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash,
- void *tvlv_buff, uint16_t tt_len,
+ void *tvlv_buff, u16 tt_len,
int (*valid_cb)(const void *, const void *),
void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tvlv_tt_change *tt_change;
struct hlist_head *head;
- uint16_t tt_tot, tt_num_entries = 0;
- uint32_t i;
+ u16 tt_tot, tt_num_entries = 0;
+ u32 i;
tt_tot = batadv_tt_entries(tt_len);
tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
@@ -2382,11 +2407,11 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
*/
static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
struct batadv_tvlv_tt_vlan_data *tt_vlan,
- uint16_t num_vlan)
+ u16 num_vlan)
{
struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
struct batadv_orig_node_vlan *vlan;
- uint32_t crc;
+ u32 crc;
int i;
/* check if each received CRC matches the locally stored one */
@@ -2441,11 +2466,11 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
struct batadv_orig_node_vlan *vlan;
- uint32_t crc;
+ u32 crc;
/* recompute the global CRC for each VLAN */
rcu_read_lock();
- list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
+ hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
/* if orig_node is a backbone node for this VLAN, don't compute
* the CRC as we ignore all the global entries over it
*/
@@ -2471,9 +2496,9 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
*/
static int batadv_send_tt_request(struct batadv_priv *bat_priv,
struct batadv_orig_node *dst_orig_node,
- uint8_t ttvn,
+ u8 ttvn,
struct batadv_tvlv_tt_vlan_data *tt_vlan,
- uint16_t num_vlan, bool full_table)
+ u16 num_vlan, bool full_table)
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tt_req_node *tt_req_node = NULL;
@@ -2489,7 +2514,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
/* The new tt_req will be issued only if I'm not waiting for a
* reply from the same orig_node yet
*/
- tt_req_node = batadv_new_tt_req_node(bat_priv, dst_orig_node);
+ tt_req_node = batadv_tt_req_node_new(bat_priv, dst_orig_node);
if (!tt_req_node)
goto out;
@@ -2531,8 +2556,8 @@ out:
batadv_hardif_free_ref(primary_if);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
- /* list_del_init() verifies tt_req_node still is in the list */
- list_del_init(&tt_req_node->list);
+ /* hlist_del_init() verifies tt_req_node still is in the list */
+ hlist_del_init(&tt_req_node->list);
spin_unlock_bh(&bat_priv->tt.req_list_lock);
kfree(tt_req_node);
}
@@ -2552,7 +2577,7 @@ out:
*/
static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
- uint8_t *req_src, uint8_t *req_dst)
+ u8 *req_src, u8 *req_dst)
{
struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
@@ -2560,9 +2585,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool ret = false, full_table;
- uint8_t orig_ttvn, req_ttvn;
- uint16_t tvlv_len;
- int32_t tt_len;
+ u8 orig_ttvn, req_ttvn;
+ u16 tvlv_len;
+ s32 tt_len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n",
@@ -2578,7 +2603,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
if (!res_dst_orig_node)
goto out;
- orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
+ orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_data->ttvn;
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
@@ -2684,16 +2709,16 @@ out:
*/
static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
- uint8_t *req_src)
+ u8 *req_src)
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_orig_node *orig_node;
- uint8_t my_ttvn, req_ttvn;
- uint16_t tvlv_len;
+ u8 my_ttvn, req_ttvn;
+ u16 tvlv_len;
bool full_table;
- int32_t tt_len;
+ s32 tt_len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n",
@@ -2702,7 +2727,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->tt.commit_lock);
- my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ my_ttvn = (u8)atomic_read(&bat_priv->tt.vn);
req_ttvn = tt_data->ttvn;
orig_node = batadv_orig_hash_find(bat_priv, req_src);
@@ -2741,7 +2766,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
bat_priv->tt.last_changeset_len);
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
} else {
- req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+ req_ttvn = (u8)atomic_read(&bat_priv->tt.vn);
/* allocate the tvlv, put the tt_data and all the tt_vlan_data
* in the initial part
@@ -2802,7 +2827,7 @@ out:
*/
static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
- uint8_t *req_src, uint8_t *req_dst)
+ u8 *req_src, u8 *req_dst)
{
if (batadv_is_my_mac(bat_priv, req_dst))
return batadv_send_my_tt_response(bat_priv, tt_data, req_src);
@@ -2813,7 +2838,7 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_tvlv_tt_change *tt_change,
- uint16_t tt_num_changes, uint8_t ttvn)
+ u16 tt_num_changes, u8 ttvn)
{
int i;
int roams;
@@ -2845,8 +2870,8 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_change *tt_change,
- uint8_t ttvn, uint8_t *resp_src,
- uint16_t num_entries)
+ u8 ttvn, u8 *resp_src,
+ u16 num_entries)
{
struct batadv_orig_node *orig_node;
@@ -2876,7 +2901,7 @@ out:
static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- uint16_t tt_num_changes, uint8_t ttvn,
+ u16 tt_num_changes, u8 ttvn,
struct batadv_tvlv_tt_change *tt_change)
{
_batadv_tt_update_changes(bat_priv, orig_node, tt_change,
@@ -2895,7 +2920,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
*
* Returns true if the client is served by this node, false otherwise.
*/
-bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr,
+bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry;
@@ -2926,13 +2951,14 @@ out:
*/
static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
struct batadv_tvlv_tt_data *tt_data,
- uint8_t *resp_src, uint16_t num_entries)
+ u8 *resp_src, u16 num_entries)
{
- struct batadv_tt_req_node *node, *safe;
+ struct batadv_tt_req_node *node;
+ struct hlist_node *safe;
struct batadv_orig_node *orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
- uint8_t *tvlv_ptr = (uint8_t *)tt_data;
- uint16_t change_offset;
+ u8 *tvlv_ptr = (u8 *)tt_data;
+ u16 change_offset;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
@@ -2966,10 +2992,10 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
/* Delete the tt_req_node from pending tt_requests list */
spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
+ hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (!batadv_compare_eth(node->addr, resp_src))
continue;
- list_del_init(&node->list);
+ hlist_del_init(&node->list);
kfree(node);
}
@@ -3015,8 +3041,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
*
* returns true if the ROAMING_ADV can be sent, false otherwise
*/
-static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv,
- uint8_t *client)
+static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
{
struct batadv_tt_roam_node *tt_roam_node;
bool ret = false;
@@ -3071,7 +3096,7 @@ unlock:
* for this particular roamed client has to be forwarded to the sender of the
* roaming message.
*/
-static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
+static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
unsigned short vid,
struct batadv_orig_node *orig_node)
{
@@ -3149,14 +3174,14 @@ void batadv_tt_free(struct batadv_priv *bat_priv)
* @enable: whether to set or unset the flag
* @count: whether to increase the TT size by the number of changed entries
*/
-static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv,
- uint16_t flags, bool enable, bool count)
+static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv, u16 flags,
+ bool enable, bool count)
{
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common_entry;
- uint16_t changed_num = 0;
+ u16 changed_num = 0;
struct hlist_head *head;
- uint32_t i;
+ u32 i;
if (!hash)
return;
@@ -3198,7 +3223,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
- uint32_t i;
+ u32 i;
if (!hash)
return;
@@ -3246,6 +3271,8 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
*/
static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
{
+ lockdep_assert_held(&bat_priv->tt.commit_lock);
+
/* Update multicast addresses in local translation table */
batadv_mcast_mla_update(bat_priv);
@@ -3264,7 +3291,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
atomic_inc(&bat_priv->tt.vn);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local changes committed, updating to ttvn %u\n",
- (uint8_t)atomic_read(&bat_priv->tt.vn));
+ (u8)atomic_read(&bat_priv->tt.vn));
/* reset the sending counter */
atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
@@ -3283,8 +3310,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
spin_unlock_bh(&bat_priv->tt.commit_lock);
}
-bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst, unsigned short vid)
+bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
+ unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry = NULL;
struct batadv_tt_global_entry *tt_global_entry = NULL;
@@ -3332,11 +3359,11 @@ out:
*/
static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const void *tt_buff, uint16_t tt_num_vlan,
+ const void *tt_buff, u16 tt_num_vlan,
struct batadv_tvlv_tt_change *tt_change,
- uint16_t tt_num_changes, uint8_t ttvn)
+ u16 tt_num_changes, u8 ttvn)
{
- uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ u8 orig_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool full_table = true;
bool has_tt_init;
@@ -3415,7 +3442,7 @@ request_table:
* deleted later by a DEL or because of timeout
*/
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr, unsigned short vid)
+ u8 *addr, unsigned short vid)
{
struct batadv_tt_global_entry *tt_global_entry;
bool ret = false;
@@ -3441,7 +3468,7 @@ out:
* to keep the latter consistent with the node TTVN
*/
bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr, unsigned short vid)
+ u8 *addr, unsigned short vid)
{
struct batadv_tt_local_entry *tt_local_entry;
bool ret = false;
@@ -3527,13 +3554,13 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
*/
static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags, void *tvlv_value,
- uint16_t tvlv_value_len)
+ u8 flags, void *tvlv_value,
+ u16 tvlv_value_len)
{
struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tt_data;
- uint16_t num_entries, num_vlan;
+ u16 num_entries, num_vlan;
if (tvlv_value_len < sizeof(*tt_data))
return;
@@ -3569,12 +3596,12 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
* otherwise.
*/
static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
- uint8_t *src, uint8_t *dst,
+ u8 *src, u8 *dst,
void *tvlv_value,
- uint16_t tvlv_value_len)
+ u16 tvlv_value_len)
{
struct batadv_tvlv_tt_data *tt_data;
- uint16_t tt_vlan_len, tt_num_entries;
+ u16 tt_vlan_len, tt_num_entries;
char tt_flag;
bool ret;
@@ -3650,9 +3677,9 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
* otherwise.
*/
static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
- uint8_t *src, uint8_t *dst,
+ u8 *src, u8 *dst,
void *tvlv_value,
- uint16_t tvlv_value_len)
+ u16 tvlv_value_len)
{
struct batadv_tvlv_roam_adv *roaming_adv;
struct batadv_orig_node *orig_node = NULL;
@@ -3734,7 +3761,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
* otherwise
*/
bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid)
+ const u8 *addr, unsigned short vid)
{
struct batadv_tt_global_entry *tt;
bool ret;
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 6acc25d3a925..abd8e116e5fb 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -22,44 +22,41 @@
#include <linux/types.h>
-struct batadv_orig_node;
-struct batadv_priv;
struct net_device;
struct seq_file;
int batadv_tt_init(struct batadv_priv *bat_priv);
-bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
- unsigned short vid, int ifindex, uint32_t mark);
-uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid,
- const char *message, bool roaming);
+bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
+ unsigned short vid, int ifindex, u32 mark);
+u16 batadv_tt_local_remove(struct batadv_priv *bat_priv,
+ const u8 *addr, unsigned short vid,
+ const char *message, bool roaming);
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- int32_t match_vid, const char *message);
+ s32 match_vid, const char *message);
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid);
+ const u8 *addr, unsigned short vid);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
- const uint8_t *src,
- const uint8_t *addr,
+ const u8 *src, const u8 *addr,
unsigned short vid);
void batadv_tt_free(struct batadv_priv *bat_priv);
-bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr,
+bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid);
-bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
- uint8_t *dst, unsigned short vid);
+bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
+ unsigned short vid);
void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv);
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr, unsigned short vid);
+ u8 *addr, unsigned short vid);
bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
- uint8_t *addr, unsigned short vid);
+ u8 *addr, unsigned short vid);
void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface);
bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *addr,
unsigned short vid);
bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
- const uint8_t *addr, unsigned short vid);
+ const u8 *addr, unsigned short vid);
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 55610a805b53..d260efd70499 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -44,7 +44,7 @@ struct seq_file;
*
* *Please be careful: batadv_dat_addr_t must be UNSIGNED*
*/
-#define batadv_dat_addr_t uint16_t
+#define batadv_dat_addr_t u16
#endif /* CONFIG_BATMAN_ADV_DAT */
@@ -103,10 +103,10 @@ struct batadv_hard_iface_bat_iv {
*/
struct batadv_hard_iface {
struct list_head list;
- int16_t if_num;
+ s16 if_num;
char if_status;
struct net_device *net_dev;
- uint8_t num_bcasts;
+ u8 num_bcasts;
struct kobject *hardif_obj;
atomic_t refcount;
struct packet_type batman_adv_ptype;
@@ -132,8 +132,8 @@ struct batadv_orig_ifinfo {
struct hlist_node list;
struct batadv_hard_iface *if_outgoing;
struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
- uint32_t last_real_seqno;
- uint8_t last_ttl;
+ u32 last_real_seqno;
+ u8 last_ttl;
unsigned long batman_seqno_reset;
atomic_t refcount;
struct rcu_head rcu;
@@ -152,9 +152,9 @@ struct batadv_frag_table_entry {
struct hlist_head head;
spinlock_t lock; /* protects head */
unsigned long timestamp;
- uint16_t seqno;
- uint16_t size;
- uint16_t total_size;
+ u16 seqno;
+ u16 size;
+ u16 total_size;
};
/**
@@ -166,7 +166,7 @@ struct batadv_frag_table_entry {
struct batadv_frag_list_entry {
struct hlist_node list;
struct sk_buff *skb;
- uint8_t no;
+ u8 no;
};
/**
@@ -175,7 +175,7 @@ struct batadv_frag_list_entry {
* @num_entries: number of TT entries for this VLAN
*/
struct batadv_vlan_tt {
- uint32_t crc;
+ u32 crc;
atomic_t num_entries;
};
@@ -190,7 +190,7 @@ struct batadv_vlan_tt {
struct batadv_orig_node_vlan {
unsigned short vid;
struct batadv_vlan_tt tt;
- struct list_head list;
+ struct hlist_node list;
atomic_t refcount;
struct rcu_head rcu;
};
@@ -206,7 +206,7 @@ struct batadv_orig_node_vlan {
*/
struct batadv_orig_bat_iv {
unsigned long *bcast_own;
- uint8_t *bcast_own_sum;
+ u8 *bcast_own_sum;
/* ogm_cnt_lock protects: bcast_own, bcast_own_sum,
* neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
*/
@@ -260,7 +260,7 @@ struct batadv_orig_bat_iv {
* @bat_iv: B.A.T.M.A.N. IV private structure
*/
struct batadv_orig_node {
- uint8_t orig[ETH_ALEN];
+ u8 orig[ETH_ALEN];
struct hlist_head ifinfo_list;
struct batadv_orig_ifinfo *last_bonding_candidate;
#ifdef CONFIG_BATMAN_ADV_DAT
@@ -271,7 +271,7 @@ struct batadv_orig_node {
#ifdef CONFIG_BATMAN_ADV_MCAST
/* synchronizes mcast tvlv specific orig changes */
spinlock_t mcast_handler_lock;
- uint8_t mcast_flags;
+ u8 mcast_flags;
struct hlist_node mcast_want_all_unsnoopables_node;
struct hlist_node mcast_want_all_ipv4_node;
struct hlist_node mcast_want_all_ipv6_node;
@@ -280,12 +280,12 @@ struct batadv_orig_node {
unsigned long capa_initialized;
atomic_t last_ttvn;
unsigned char *tt_buff;
- int16_t tt_buff_len;
+ s16 tt_buff_len;
spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
/* prevents from changing the table while reading it */
spinlock_t tt_lock;
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
- uint32_t last_bcast_seqno;
+ u32 last_bcast_seqno;
struct hlist_head neigh_list;
/* neigh_list_lock protects: neigh_list and router */
spinlock_t neigh_list_lock;
@@ -302,7 +302,7 @@ struct batadv_orig_node {
spinlock_t out_coding_list_lock; /* Protects out_coding_list */
#endif
struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
- struct list_head vlan_list;
+ struct hlist_head vlan_list;
spinlock_t vlan_list_lock; /* protects vlan_list */
struct batadv_orig_bat_iv bat_iv;
};
@@ -328,16 +328,14 @@ enum batadv_orig_capabilities {
* @orig_node: pointer to corresponding orig node
* @bandwidth_down: advertised uplink download bandwidth
* @bandwidth_up: advertised uplink upload bandwidth
- * @deleted: this struct is scheduled for deletion
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_gw_node {
struct hlist_node list;
struct batadv_orig_node *orig_node;
- uint32_t bandwidth_down;
- uint32_t bandwidth_up;
- unsigned long deleted;
+ u32 bandwidth_down;
+ u32 bandwidth_up;
atomic_t refcount;
struct rcu_head rcu;
};
@@ -358,7 +356,7 @@ struct batadv_gw_node {
struct batadv_neigh_node {
struct hlist_node list;
struct batadv_orig_node *orig_node;
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
struct hlist_head ifinfo_list;
spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
struct batadv_hard_iface *if_incoming;
@@ -378,11 +376,11 @@ struct batadv_neigh_node {
* @real_packet_count: counted result of real_bits
*/
struct batadv_neigh_ifinfo_bat_iv {
- uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
- uint8_t tq_index;
- uint8_t tq_avg;
+ u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE];
+ u8 tq_index;
+ u8 tq_avg;
DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
- uint8_t real_packet_count;
+ u8 real_packet_count;
};
/**
@@ -398,7 +396,7 @@ struct batadv_neigh_ifinfo {
struct hlist_node list;
struct batadv_hard_iface *if_outgoing;
struct batadv_neigh_ifinfo_bat_iv bat_iv;
- uint8_t last_ttl;
+ u8 last_ttl;
atomic_t refcount;
struct rcu_head rcu;
};
@@ -411,7 +409,7 @@ struct batadv_neigh_ifinfo {
*/
#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bcast_duplist_entry {
- uint8_t orig[ETH_ALEN];
+ u8 orig[ETH_ALEN];
__be32 crc;
unsigned long entrytime;
};
@@ -537,13 +535,13 @@ struct batadv_priv_tt {
struct list_head changes_list;
struct batadv_hashtable *local_hash;
struct batadv_hashtable *global_hash;
- struct list_head req_list;
+ struct hlist_head req_list;
struct list_head roam_list;
spinlock_t changes_list_lock; /* protects changes */
spinlock_t req_list_lock; /* protects req_list */
spinlock_t roam_list_lock; /* protects roam_list */
unsigned char *last_changeset;
- int16_t last_changeset_len;
+ s16 last_changeset_len;
/* protects last_changeset & last_changeset_len */
spinlock_t last_changeset_lock;
/* prevents from executing a commit while reading the table */
@@ -663,7 +661,7 @@ struct batadv_priv_mcast {
struct hlist_head want_all_unsnoopables_list;
struct hlist_head want_all_ipv4_list;
struct hlist_head want_all_ipv6_list;
- uint8_t flags;
+ u8 flags;
bool enabled;
atomic_t num_disabled;
atomic_t num_want_all_unsnoopables;
@@ -781,7 +779,7 @@ struct batadv_priv {
atomic_t mesh_state;
struct net_device *soft_iface;
struct net_device_stats stats;
- uint64_t __percpu *bat_counters; /* Per cpu counters */
+ u64 __percpu *bat_counters; /* Per cpu counters */
atomic_t aggregated_ogms;
atomic_t bonding;
atomic_t fragmentation;
@@ -803,8 +801,8 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_DEBUG
atomic_t log_level;
#endif
- uint32_t isolation_mark;
- uint32_t isolation_mark_mask;
+ u32 isolation_mark;
+ u32 isolation_mark_mask;
atomic_t bcast_seqno;
atomic_t bcast_queue_left;
atomic_t batman_queue_left;
@@ -870,7 +868,7 @@ struct batadv_socket_client {
struct batadv_socket_packet {
struct list_head list;
size_t icmp_len;
- uint8_t icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
+ u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
};
/**
@@ -891,14 +889,14 @@ struct batadv_socket_packet {
*/
#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bla_backbone_gw {
- uint8_t orig[ETH_ALEN];
+ u8 orig[ETH_ALEN];
unsigned short vid;
struct hlist_node hash_entry;
struct batadv_priv *bat_priv;
unsigned long lasttime;
atomic_t wait_periods;
atomic_t request_sent;
- uint16_t crc;
+ u16 crc;
atomic_t refcount;
struct rcu_head rcu;
};
@@ -914,7 +912,7 @@ struct batadv_bla_backbone_gw {
* @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_bla_claim {
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
unsigned short vid;
struct batadv_bla_backbone_gw *backbone_gw;
unsigned long lasttime;
@@ -936,10 +934,10 @@ struct batadv_bla_claim {
* @rcu: struct used for freeing in an RCU-safe manner
*/
struct batadv_tt_common_entry {
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
unsigned short vid;
struct hlist_node hash_entry;
- uint16_t flags;
+ u16 flags;
unsigned long added_at;
atomic_t refcount;
struct rcu_head rcu;
@@ -981,7 +979,7 @@ struct batadv_tt_global_entry {
*/
struct batadv_tt_orig_list_entry {
struct batadv_orig_node *orig_node;
- uint8_t ttvn;
+ u8 ttvn;
struct hlist_node list;
atomic_t refcount;
struct rcu_head rcu;
@@ -1004,9 +1002,9 @@ struct batadv_tt_change_node {
* @list: list node for batadv_priv_tt::req_list
*/
struct batadv_tt_req_node {
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
unsigned long issued_at;
- struct list_head list;
+ struct hlist_node list;
};
/**
@@ -1018,7 +1016,7 @@ struct batadv_tt_req_node {
* @list: list node for batadv_priv_tt::roam_list
*/
struct batadv_tt_roam_node {
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
atomic_t counter;
unsigned long first_time;
struct list_head list;
@@ -1035,7 +1033,7 @@ struct batadv_tt_roam_node {
*/
struct batadv_nc_node {
struct list_head list;
- uint8_t addr[ETH_ALEN];
+ u8 addr[ETH_ALEN];
atomic_t refcount;
struct rcu_head rcu;
struct batadv_orig_node *orig_node;
@@ -1059,8 +1057,8 @@ struct batadv_nc_path {
atomic_t refcount;
struct list_head packet_list;
spinlock_t packet_list_lock; /* Protects packet_list */
- uint8_t next_hop[ETH_ALEN];
- uint8_t prev_hop[ETH_ALEN];
+ u8 next_hop[ETH_ALEN];
+ u8 prev_hop[ETH_ALEN];
unsigned long last_valid;
};
@@ -1112,11 +1110,11 @@ struct batadv_skb_cb {
struct batadv_forw_packet {
struct hlist_node list;
unsigned long send_time;
- uint8_t own;
+ u8 own;
struct sk_buff *skb;
- uint16_t packet_len;
- uint32_t direct_link_flags;
- uint8_t num_packets;
+ u16 packet_len;
+ u32 direct_link_flags;
+ u8 num_packets;
struct delayed_work delayed_work;
struct batadv_hard_iface *if_incoming;
struct batadv_hard_iface *if_outgoing;
@@ -1191,7 +1189,7 @@ struct batadv_algo_ops {
*/
struct batadv_dat_entry {
__be32 ip;
- uint8_t mac_addr[ETH_ALEN];
+ u8 mac_addr[ETH_ALEN];
unsigned short vid;
unsigned long last_update;
struct hlist_node hash_entry;
@@ -1253,14 +1251,13 @@ struct batadv_tvlv_handler {
struct hlist_node list;
void (*ogm_handler)(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
- uint8_t flags,
- void *tvlv_value, uint16_t tvlv_value_len);
+ u8 flags, void *tvlv_value, u16 tvlv_value_len);
int (*unicast_handler)(struct batadv_priv *bat_priv,
- uint8_t *src, uint8_t *dst,
- void *tvlv_value, uint16_t tvlv_value_len);
- uint8_t type;
- uint8_t version;
- uint8_t flags;
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len);
+ u8 type;
+ u8 version;
+ u8 flags;
atomic_t refcount;
struct rcu_head rcu;
};
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0aa8f5cf46a1..6ed2feb51e3c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -365,8 +365,7 @@ void br_dev_setup(struct net_device *dev)
dev->destructor = br_dev_free;
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
- dev->tx_queue_len = 0;
- dev->priv_flags = IFF_EBRIDGE;
+ dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 0752796fe0ba..66efdc21f548 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1608,7 +1608,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
break;
}
- if (skb_trimmed)
+ if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
return err;
@@ -1653,7 +1653,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
break;
}
- if (skb_trimmed)
+ if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
return err;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 0f2408f6cdfe..af5e187553fd 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -673,6 +673,21 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return -EADDRNOTAVAIL;
}
+ if (!data)
+ return 0;
+
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ if (data[IFLA_BR_VLAN_PROTOCOL]) {
+ switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) {
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+ }
+#endif
+
return 0;
}
@@ -729,6 +744,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_STP_STATE] = { .type = NLA_U32 },
[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
[IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 },
+ [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -784,6 +800,16 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ if (data[IFLA_BR_VLAN_PROTOCOL]) {
+ __be16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]);
+
+ err = __br_vlan_set_proto(br, vlan_proto);
+ if (err)
+ return err;
+ }
+#endif
+
return 0;
}
@@ -796,6 +822,9 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */
nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */
nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
+#endif
0;
}
@@ -819,6 +848,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled))
return -EMSGSIZE;
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto))
+ return -EMSGSIZE;
+#endif
+
return 0;
}
@@ -849,7 +883,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.kind = "bridge",
.priv_size = sizeof(struct net_bridge),
.setup = br_dev_setup,
- .maxtype = IFLA_BRPORT_MAX,
+ .maxtype = IFLA_BR_MAX,
.policy = br_policy,
.validate = br_validate,
.newlink = br_dev_newlink,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3d95647039d0..213baf7aaa93 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -95,15 +95,15 @@ struct net_bridge_fdb_entry
struct hlist_node hlist;
struct net_bridge_port *dst;
- struct rcu_head rcu;
unsigned long updated;
unsigned long used;
mac_addr addr;
+ __u16 vlan_id;
unsigned char is_local:1,
is_static:1,
added_by_user:1,
added_by_external_learn:1;
- __u16 vlan_id;
+ struct rcu_head rcu;
};
struct net_bridge_port_group {
@@ -616,6 +616,7 @@ bool br_vlan_find(struct net_bridge *br, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 3cef6892c0bb..3cd8cc9e804b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -492,23 +492,16 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
return 0;
}
-int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
{
int err = 0;
struct net_bridge_port *p;
struct net_port_vlans *pv;
- __be16 proto, oldproto;
+ __be16 oldproto;
u16 vid, errvid;
- if (val != ETH_P_8021Q && val != ETH_P_8021AD)
- return -EPROTONOSUPPORT;
-
- if (!rtnl_trylock())
- return restart_syscall();
-
- proto = htons(val);
if (br->vlan_proto == proto)
- goto unlock;
+ return 0;
/* Add VLANs for the new proto to the device filter. */
list_for_each_entry(p, &br->port_list, list) {
@@ -539,9 +532,7 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
vlan_vid_del(p->dev, oldproto, vid);
}
-unlock:
- rtnl_unlock();
- return err;
+ return 0;
err_filt:
errvid = vid;
@@ -557,7 +548,23 @@ err_filt:
vlan_vid_del(p->dev, proto, vid);
}
- goto unlock;
+ return err;
+}
+
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+{
+ int err;
+
+ if (val != ETH_P_8021Q && val != ETH_P_8021AD)
+ return -EPROTONOSUPPORT;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ err = __br_vlan_set_proto(br, htons(val));
+ rtnl_unlock();
+
+ return err;
}
static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 18ca4b24c418..48b6b01295de 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -176,7 +176,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
return 0;
}
-static inline __pure
+static inline
struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
{
return (void *)entry + entry->next_offset;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index edbca468fa73..d730a0f68f46 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -177,7 +177,7 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
skb->protocol = htons(ETH_P_CAIF);
/* Check if we need to handle xoff */
- if (likely(caifd->netdev->tx_queue_len == 0))
+ if (likely(caifd->netdev->priv_flags & IFF_NO_QUEUE))
goto noxoff;
if (unlikely(caifd->xoff))
diff --git a/net/core/dev.c b/net/core/dev.c
index 4870c3556a5a..877c84834d81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3657,7 +3657,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res)) {
+ switch (tc_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -5311,6 +5311,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *private)
{
+ struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
@@ -5329,6 +5330,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
+ changeupper_info.upper_dev = upper_dev;
+ changeupper_info.master = master;
+ changeupper_info.linking = true;
+
ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
master);
if (ret)
@@ -5367,7 +5372,8 @@ static int __netdev_upper_dev_link(struct net_device *dev,
goto rollback_lower_mesh;
}
- call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
return 0;
rollback_lower_mesh:
@@ -5462,9 +5468,14 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
+ struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j;
ASSERT_RTNL();
+ changeupper_info.upper_dev = upper_dev;
+ changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
+ changeupper_info.linking = false;
+
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
@@ -5484,7 +5495,8 @@ void netdev_upper_dev_unlink(struct net_device *dev,
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(dev, i->dev);
- call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6997,6 +7009,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
+ if (!dev->tx_queue_len)
+ dev->priv_flags |= IFF_NO_QUEUE;
+
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
diff --git a/net/core/dst.c b/net/core/dst.c
index f8694d1b8702..477035ed7903 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -20,6 +20,7 @@
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
+#include <net/lwtunnel.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -184,6 +185,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
+ dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
@@ -260,6 +262,8 @@ again:
if (dst->dev)
dev_put(dst->dev);
+ lwtstate_put(dst->lwtstate);
+
if (dst->flags & DST_METADATA)
kfree(dst);
else
diff --git a/net/core/filter.c b/net/core/filter.c
index f8184222465e..13079f03902e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1349,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
int offset = (int) r2;
__sum16 sum, *ptr;
@@ -1489,13 +1489,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
- struct ip_tunnel_info *info = skb_tunnel_info(skb, AF_INET);
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
return -EINVAL;
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
to->tunnel_id = be64_to_cpu(info->key.tun_id);
- to->remote_ipv4 = be32_to_cpu(info->key.ipv4_src);
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
return 0;
}
@@ -1528,8 +1530,9 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
+ info->key.tun_flags = TUNNEL_KEY;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
- info->key.ipv4_dst = cpu_to_be32(from->remote_ipv4);
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
return 0;
}
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 5d6d8e3d450a..dfb1a9ca0835 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -72,7 +72,8 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
EXPORT_SYMBOL(lwtunnel_encap_del_ops);
int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
- struct nlattr *encap, struct lwtunnel_state **lws)
+ struct nlattr *encap, unsigned int family,
+ const void *cfg, struct lwtunnel_state **lws)
{
const struct lwtunnel_encap_ops *ops;
int ret = -EINVAL;
@@ -85,7 +86,7 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[encap_type]);
if (likely(ops && ops->build_state))
- ret = ops->build_state(dev, encap, lws);
+ ret = ops->build_state(dev, encap, family, cfg, lws);
rcu_read_unlock();
return ret;
@@ -179,14 +180,16 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
}
EXPORT_SYMBOL(lwtunnel_cmp_encap);
-int __lwtunnel_output(struct sock *sk, struct sk_buff *skb,
- struct lwtunnel_state *lwtstate)
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
{
+ struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
int ret = -EINVAL;
- if (!lwtstate)
+ if (!dst)
goto drop;
+ lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
lwtstate->type > LWTUNNEL_ENCAP_MAX)
@@ -209,35 +212,38 @@ drop:
return ret;
}
+EXPORT_SYMBOL(lwtunnel_output);
-int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
+int lwtunnel_input(struct sk_buff *skb)
{
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
- struct lwtunnel_state *lwtstate = NULL;
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
- if (rt) {
- lwtstate = rt->rt6i_lwtstate;
- skb->dev = rt->dst.dev;
- }
+ if (!dst)
+ goto drop;
+ lwtstate = dst->lwtstate;
- skb->protocol = htons(ETH_P_IPV6);
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
- return __lwtunnel_output(sk, skb, lwtstate);
-}
-EXPORT_SYMBOL(lwtunnel_output6);
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->input))
+ ret = ops->input(skb);
+ rcu_read_unlock();
-int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
-{
- struct rtable *rt = (struct rtable *)skb_dst(skb);
- struct lwtunnel_state *lwtstate = NULL;
+ if (ret == -EOPNOTSUPP)
+ goto drop;
- if (rt) {
- lwtstate = rt->rt_lwtstate;
- skb->dev = rt->dst.dev;
- }
+ return ret;
- skb->protocol = htons(ETH_P_IP);
+drop:
+ kfree_skb(skb);
- return __lwtunnel_output(sk, skb, lwtstate);
+ return ret;
}
-EXPORT_SYMBOL(lwtunnel_output);
+EXPORT_SYMBOL(lwtunnel_input);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ba3c0120786c..adef015b2f41 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/fib.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c126a878c47c..6aa3db8dfc3b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -380,6 +380,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static atomic_t ip_ident;
struct ipv6hdr *ip6h;
+ WARN_ON_ONCE(!irqs_disabled());
+
udp_len = len + sizeof(*udph);
if (np->ipv6)
ip_len = udp_len + sizeof(*ip6h);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6a19ca0f99e..dad4dd37e2aa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,7 +340,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
if (skb && frag_size) {
skb->head_frag = 1;
- if (virt_to_head_page(data)->pfmemalloc)
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
skb->pfmemalloc = 1;
}
return skb;
@@ -392,7 +392,7 @@ EXPORT_SYMBOL(napi_alloc_frag);
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
- * @length: length to allocate
+ * @len: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
*
* Allocate a new &sk_buff and assign it a usage count of one. The
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
/**
* __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
* @napi: napi instance this buffer was allocated for
- * @length: length to allocate
+ * @len: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
*
* Allocate a new sk_buff for use in NAPI receive. This buffer will
@@ -4022,8 +4022,8 @@ EXPORT_SYMBOL(skb_checksum_setup);
* Otherwise returns the provided skb. Returns NULL in error cases
* (e.g. transport_len exceeds skb length or out-of-memory).
*
- * Caller needs to set the skb transport header and release the returned skb.
- * Provided skb is consumed.
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
*/
static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
unsigned int transport_len)
@@ -4032,16 +4032,12 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
unsigned int len = skb_transport_offset(skb) + transport_len;
int ret;
- if (skb->len < len) {
- kfree_skb(skb);
+ if (skb->len < len)
return NULL;
- } else if (skb->len == len) {
+ else if (skb->len == len)
return skb;
- }
skb_chk = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(skb);
-
if (!skb_chk)
return NULL;
@@ -4066,8 +4062,8 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
* If the skb has data beyond the given transport length, then a
* trimmed & cloned skb is checked and returned.
*
- * Caller needs to set the skb transport header and release the returned skb.
- * Provided skb is consumed.
+ * Caller needs to set the skb transport header and free any returned skb if it
+ * differs from the provided skb.
*/
struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
unsigned int transport_len,
@@ -4079,23 +4075,26 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
skb_chk = skb_checksum_maybe_trim(skb, transport_len);
if (!skb_chk)
- return NULL;
+ goto err;
- if (!pskb_may_pull(skb_chk, offset)) {
- kfree_skb(skb_chk);
- return NULL;
- }
+ if (!pskb_may_pull(skb_chk, offset))
+ goto err;
__skb_pull(skb_chk, offset);
ret = skb_chkf(skb_chk);
__skb_push(skb_chk, offset);
- if (ret) {
- kfree_skb(skb_chk);
- return NULL;
- }
+ if (ret)
+ goto err;
return skb_chk;
+
+err:
+ if (skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return NULL;
+
}
EXPORT_SYMBOL(skb_checksum_trimmed);
diff --git a/net/core/sock.c b/net/core/sock.c
index 193901d09757..ca2984afe16e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2078,7 +2078,7 @@ suppress_allocation:
EXPORT_SYMBOL(__sk_mem_schedule);
/**
- * __sk_reclaim - reclaim memory_allocated
+ * __sk_mem_reclaim - reclaim memory_allocated
* @sk: socket
* @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
*/
diff --git a/net/core/utils.c b/net/core/utils.c
index a7732a068043..3dffce953c39 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -301,7 +301,7 @@ out:
EXPORT_SYMBOL(in6_pton);
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
- __be32 from, __be32 to, int pseudohdr)
+ __be32 from, __be32 to, bool pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace4(sum, from, to);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4);
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
- int pseudohdr)
+ bool pseudohdr)
{
__be32 diff[] = {
~from[0], ~from[1], ~from[2], ~from[3],
@@ -336,6 +336,19 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace16);
+void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ __wsum diff, bool pseudohdr)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_add(diff, ~skb->csum);
+ } else if (pseudohdr) {
+ *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+ }
+}
+EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
+
struct __net_random_once_work {
struct work_struct work;
struct static_key *key;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 78d4ac97aae3..053eb2b8e682 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -554,6 +554,31 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
return 0;
}
+static int dsa_of_probe_links(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *port,
+ const char *port_name)
+{
+ struct device_node *link;
+ int link_index;
+ int ret;
+
+ for (link_index = 0;; link_index++) {
+ link = of_parse_phandle(port, "link", link_index);
+ if (!link)
+ break;
+
+ if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
+ ret = dsa_of_setup_routing_table(pd, cd, chip_index,
+ port_index, link);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
{
int i;
@@ -573,7 +598,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
static int dsa_of_probe(struct device *dev)
{
struct device_node *np = dev->of_node;
- struct device_node *child, *mdio, *ethernet, *port, *link;
+ struct device_node *child, *mdio, *ethernet, *port;
struct mii_bus *mdio_bus, *mdio_bus_switch;
struct net_device *ethernet_dev;
struct dsa_platform_data *pd;
@@ -668,15 +693,10 @@ static int dsa_of_probe(struct device *dev)
goto out_free_chip;
}
- link = of_parse_phandle(port, "link", 0);
-
- if (!strcmp(port_name, "dsa") && link &&
- pd->nr_chips > 1) {
- ret = dsa_of_setup_routing_table(pd, cd,
- chip_index, port_index, link);
- if (ret)
- goto out_free_chip;
- }
+ ret = dsa_of_probe_links(pd, cd, chip_index,
+ port_index, port, port_name);
+ if (ret)
+ goto out_free_chip;
}
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 373ff315030d..cce97385f743 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1147,7 +1147,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
eth_hw_addr_inherit(slave_dev, master);
- slave_dev->tx_queue_len = 0;
+ slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 44d27469ae55..35a9788bb3ae 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -392,7 +392,7 @@ void hsr_dev_setup(struct net_device *dev)
dev->header_ops = &hsr_header_ops;
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
dev->destructor = hsr_dev_destroy;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 27c25ad935b4..953b1c49f5d1 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -90,7 +90,7 @@ static void lowpan_setup(struct net_device *dev)
dev->hard_header_len = 2 + 1 + 20 + 14;
dev->needed_tailroom = 2; /* FCS */
dev->mtu = IPV6_MIN_MTU;
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
dev->flags = IFF_BROADCAST | IFF_MULTICAST;
dev->watchdog_timeo = 0;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6fb3c90ad726..416dfa004cfb 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -331,20 +331,6 @@ config NET_FOU_IP_TUNNELS
When this option is enabled IP tunnels can be configured to use
FOU or GUE encapsulation.
-config GENEVE_CORE
- tristate "Generic Network Virtualization Encapsulation library"
- depends on INET
- select NET_UDP_TUNNEL
- ---help---
- This allows one to create Geneve virtual interfaces that provide
- Layer 2 Networks over Layer 3 Networks. Geneve is often used
- to tunnel virtual network infrastructure in virtualized environments.
- For more information see:
- http://tools.ietf.org/html/draft-gross-geneve-01
-
- To compile this driver as a module, choose M here: the module
-
-
config INET_AH
tristate "IP: AH transformation"
select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index efc43f300b8c..89aacb630a53 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
-obj-$(CONFIG_GENEVE_CORE) += geneve_core.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c8b855882fa5..675e88cac2b4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,15 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- if (sk->sk_bound_dev_if) {
- struct net_device *dev;
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
- if (dev)
- tb_id = vrf_dev_table_rcu(dev) ? : tb_id;
- rcu_read_unlock();
- }
+ tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
/* Not specified by any standard per-se, however it breaks too
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index ac9a32ec3ee4..f2a71025a770 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -360,8 +360,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
ahp->icv_trunc_len + seqhi_len);
- if (!work_iph)
+ if (!work_iph) {
+ err = -ENOMEM;
goto out;
+ }
seqhi = (__be32 *)((char *)work_iph + ihl);
auth_data = ah_tmp_auth(seqhi, seqhi_len);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7fa277176c33..4036c94dfbe1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -46,6 +46,7 @@
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/vrf.h>
+#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -344,6 +345,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
+ trace_fib_validate_source(dev, &fl4);
+
net = dev_net(dev);
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c8025851dac7..1b2d01170a4d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -511,7 +511,8 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
dev = __dev_get_by_index(net, cfg->fc_oif);
ret = lwtunnel_build_state(dev, nla_get_u16(
nla_entype),
- nla, &lwtstate);
+ nla, AF_INET, cfg,
+ &lwtstate);
if (ret)
goto errout;
nexthop_nh->nh_lwtstate =
@@ -533,25 +534,28 @@ errout:
#endif
-int fib_encap_match(struct net *net, u16 encap_type,
- struct nlattr *encap,
- int oif, const struct fib_nh *nh)
+static int fib_encap_match(struct net *net, u16 encap_type,
+ struct nlattr *encap,
+ int oif, const struct fib_nh *nh,
+ const struct fib_config *cfg)
{
struct lwtunnel_state *lwtstate;
struct net_device *dev = NULL;
- int ret;
+ int ret, result = 0;
if (encap_type == LWTUNNEL_ENCAP_NONE)
return 0;
if (oif)
dev = __dev_get_by_index(net, oif);
- ret = lwtunnel_build_state(dev, encap_type,
- encap, &lwtstate);
- if (!ret)
- return lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+ ret = lwtunnel_build_state(dev, encap_type, encap,
+ AF_INET, cfg, &lwtstate);
+ if (!ret) {
+ result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+ lwtstate_free(lwtstate);
+ }
- return 0;
+ return result;
}
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
@@ -569,7 +573,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
if (cfg->fc_encap) {
if (fib_encap_match(net, cfg->fc_encap_type,
cfg->fc_encap, cfg->fc_oif,
- fi->fib_nh))
+ fi->fib_nh, cfg))
return 1;
}
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
@@ -661,7 +665,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
struct fib_nh *nh)
{
- int err;
+ int err = 0;
struct net *net;
struct net_device *dev;
@@ -710,9 +714,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
err = fib_table_lookup(tbl, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE |
FIB_LOOKUP_NOREF);
- else
+
+ /* on error or if no table given do full lookup. This
+ * is needed for example when nexthops are in the local
+ * table rather than the given table
+ */
+ if (!tbl || err) {
err = fib_lookup(net, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE);
+ }
+
if (err) {
rcu_read_unlock();
return err;
@@ -996,7 +1007,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (cfg->fc_oif)
dev = __dev_get_by_index(net, cfg->fc_oif);
err = lwtunnel_build_state(dev, cfg->fc_encap_type,
- cfg->fc_encap, &lwtstate);
+ cfg->fc_encap, AF_INET, cfg,
+ &lwtstate);
if (err)
goto failure;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1243c79cb5b0..26d6ffb6d23c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
+#include <trace/events/fib.h>
#include "fib_lookup.h"
#define MAX_STAT_DEPTH 32
@@ -1278,6 +1279,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
unsigned long index;
t_key cindex;
+ trace_fib_table_lookup(tb->tb_id, flp);
+
pn = t->kv;
cindex = 0;
@@ -1442,6 +1445,8 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
+ trace_fib_table_lookup_nh(nh);
+
return err;
}
}
@@ -2468,7 +2473,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
key = l->key + 1;
iter->pos++;
- if (pos-- <= 0)
+ if (--pos <= 0)
break;
l = NULL;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 34968cd5c146..e0fcbbbcfe54 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -79,7 +79,11 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
__be16 *pd = data;
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
- size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
+ size_t plen = sizeof(struct udphdr) + hdrlen +
+ max_t(size_t, offset + sizeof(u16), start);
+
+ if (skb->remcsum_offload)
+ return guehdr;
if (!pskb_may_pull(skb, plen))
return NULL;
@@ -221,29 +225,21 @@ out_unlock:
static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
struct guehdr *guehdr, void *data,
- size_t hdrlen, u8 ipproto,
- struct gro_remcsum *grc, bool nopartial)
+ size_t hdrlen, struct gro_remcsum *grc,
+ bool nopartial)
{
__be16 *pd = data;
size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]);
- size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
if (skb->remcsum_offload)
- return NULL;
+ return guehdr;
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
- /* Pull checksum that will be written */
- if (skb_gro_header_hard(skb, off + plen)) {
- guehdr = skb_gro_header_slow(skb, off + plen, off);
- if (!guehdr)
- return NULL;
- }
-
- skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
- start, offset, grc, nopartial);
+ guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
+ start, offset, grc, nopartial);
skb->remcsum_offload = 1;
@@ -307,10 +303,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
if (flags & GUE_PFLAG_REMCSUM) {
guehdr = gue_gro_remcsum(skb, off, guehdr,
- data + doffset, hdrlen,
- guehdr->proto_ctype, &grc,
+ data + doffset, hdrlen, &grc,
!!(fou->flags &
FOU_F_REMCSUM_NOPARTIAL));
+
if (!guehdr)
goto out;
@@ -351,7 +347,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[guehdr->proto_ctype]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
@@ -570,7 +566,7 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[FOU_ATTR_AF]) {
u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
- if (family != AF_INET && family != AF_INET6)
+ if (family != AF_INET)
return -EINVAL;
cfg->udp_config.family = family;
diff --git a/net/ipv4/geneve_core.c b/net/ipv4/geneve_core.c
deleted file mode 100644
index 311a4ba6950a..000000000000
--- a/net/ipv4/geneve_core.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Geneve: Generic Network Virtualization Encapsulation
- *
- * Copyright (c) 2014 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/igmp.h>
-#include <linux/etherdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_vlan.h>
-#include <linux/ethtool.h>
-#include <linux/mutex.h>
-#include <net/arp.h>
-#include <net/ndisc.h>
-#include <net/ip.h>
-#include <net/ip_tunnels.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/geneve.h>
-#include <net/protocol.h>
-#include <net/udp_tunnel.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_tunnel.h>
-#include <net/ip6_checksum.h>
-#endif
-
-/* Protects sock_list and refcounts. */
-static DEFINE_MUTEX(geneve_mutex);
-
-/* per-network namespace private data for this module */
-struct geneve_net {
- struct list_head sock_list;
-};
-
-static int geneve_net_id;
-
-static struct geneve_sock *geneve_find_sock(struct net *net,
- sa_family_t family, __be16 port)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
-
- list_for_each_entry(gs, &gn->sock_list, list) {
- if (inet_sk(gs->sock->sk)->inet_sport == port &&
- inet_sk(gs->sock->sk)->sk.sk_family == family)
- return gs;
- }
-
- return NULL;
-}
-
-static void geneve_build_header(struct genevehdr *geneveh,
- __be16 tun_flags, u8 vni[3],
- u8 options_len, u8 *options)
-{
- geneveh->ver = GENEVE_VER;
- geneveh->opt_len = options_len / 4;
- geneveh->oam = !!(tun_flags & TUNNEL_OAM);
- geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
- geneveh->rsvd1 = 0;
- memcpy(geneveh->vni, vni, 3);
- geneveh->proto_type = htons(ETH_P_TEB);
- geneveh->rsvd2 = 0;
-
- memcpy(geneveh->options, options, options_len);
-}
-
-/* Transmit a fully formatted Geneve frame.
- *
- * When calling this function. The skb->data should point
- * to the geneve header which is fully formed.
- *
- * This function will add other UDP tunnel headers.
- */
-int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
- struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
- __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
- __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
- bool csum, bool xnet)
-{
- struct genevehdr *gnvh;
- int min_headroom;
- int err;
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
-
- skb = vlan_hwaccel_push_inside(skb);
- if (unlikely(!skb))
- return -ENOMEM;
-
- skb = udp_tunnel_handle_offloads(skb, csum);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
- geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
-
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-
- return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst,
- tos, ttl, df, src_port, dst_port, xnet,
- !csum);
-}
-EXPORT_SYMBOL_GPL(geneve_xmit_skb);
-
-static int geneve_hlen(struct genevehdr *gh)
-{
- return sizeof(*gh) + gh->opt_len * 4;
-}
-
-static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
- struct sk_buff *skb,
- struct udp_offload *uoff)
-{
- struct sk_buff *p, **pp = NULL;
- struct genevehdr *gh, *gh2;
- unsigned int hlen, gh_len, off_gnv;
- const struct packet_offload *ptype;
- __be16 type;
- int flush = 1;
-
- off_gnv = skb_gro_offset(skb);
- hlen = off_gnv + sizeof(*gh);
- gh = skb_gro_header_fast(skb, off_gnv);
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
-
- if (gh->ver != GENEVE_VER || gh->oam)
- goto out;
- gh_len = geneve_hlen(gh);
-
- hlen = off_gnv + gh_len;
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
-
- flush = 0;
-
- for (p = *head; p; p = p->next) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- gh2 = (struct genevehdr *)(p->data + off_gnv);
- if (gh->opt_len != gh2->opt_len ||
- memcmp(gh, gh2, gh_len)) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
- }
-
- type = gh->proto_type;
-
- rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (!ptype) {
- flush = 1;
- goto out_unlock;
- }
-
- skb_gro_pull(skb, gh_len);
- skb_gro_postpull_rcsum(skb, gh, gh_len);
- pp = ptype->callbacks.gro_receive(head, skb);
-
-out_unlock:
- rcu_read_unlock();
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-
-static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
- struct udp_offload *uoff)
-{
- struct genevehdr *gh;
- struct packet_offload *ptype;
- __be16 type;
- int gh_len;
- int err = -ENOSYS;
-
- udp_tunnel_gro_complete(skb, nhoff);
-
- gh = (struct genevehdr *)(skb->data + nhoff);
- gh_len = geneve_hlen(gh);
- type = gh->proto_type;
-
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
-
- rcu_read_unlock();
- return err;
-}
-
-static void geneve_notify_add_rx_port(struct geneve_sock *gs)
-{
- struct sock *sk = gs->sock->sk;
- sa_family_t sa_family = sk->sk_family;
- int err;
-
- if (sa_family == AF_INET) {
- err = udp_add_offload(&gs->udp_offloads);
- if (err)
- pr_warn("geneve: udp_add_offload failed with status %d\n",
- err);
- }
-}
-
-static void geneve_notify_del_rx_port(struct geneve_sock *gs)
-{
- struct sock *sk = gs->sock->sk;
- sa_family_t sa_family = sk->sk_family;
-
- if (sa_family == AF_INET)
- udp_del_offload(&gs->udp_offloads);
-}
-
-/* Callback from net/ipv4/udp.c to receive packets */
-static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct genevehdr *geneveh;
- struct geneve_sock *gs;
- int opts_len;
-
- /* Need Geneve and inner Ethernet header to be present */
- if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
- goto error;
-
- /* Return packets with reserved bits set */
- geneveh = geneve_hdr(skb);
-
- if (unlikely(geneveh->ver != GENEVE_VER))
- goto error;
-
- if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
- goto error;
-
- opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB)))
- goto drop;
-
- gs = rcu_dereference_sk_user_data(sk);
- if (!gs)
- goto drop;
-
- gs->rcv(gs, skb);
- return 0;
-
-drop:
- /* Consume bad packet */
- kfree_skb(skb);
- return 0;
-
-error:
- /* Let the UDP layer deal with the skb */
- return 1;
-}
-
-static struct socket *geneve_create_sock(struct net *net, bool ipv6,
- __be16 port)
-{
- struct socket *sock;
- struct udp_port_cfg udp_conf;
- int err;
-
- memset(&udp_conf, 0, sizeof(udp_conf));
-
- if (ipv6) {
- udp_conf.family = AF_INET6;
- } else {
- udp_conf.family = AF_INET;
- udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
- }
-
- udp_conf.local_udp_port = port;
-
- /* Open UDP socket */
- err = udp_sock_create(net, &udp_conf, &sock);
- if (err < 0)
- return ERR_PTR(err);
-
- return sock;
-}
-
-/* Create new listen socket if needed */
-static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
- geneve_rcv_t *rcv, void *data,
- bool ipv6)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
- struct geneve_sock *gs;
- struct socket *sock;
- struct udp_tunnel_sock_cfg tunnel_cfg;
-
- gs = kzalloc(sizeof(*gs), GFP_KERNEL);
- if (!gs)
- return ERR_PTR(-ENOMEM);
-
- sock = geneve_create_sock(net, ipv6, port);
- if (IS_ERR(sock)) {
- kfree(gs);
- return ERR_CAST(sock);
- }
-
- gs->sock = sock;
- gs->refcnt = 1;
- gs->rcv = rcv;
- gs->rcv_data = data;
-
- /* Initialize the geneve udp offloads structure */
- gs->udp_offloads.port = port;
- gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive;
- gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
- geneve_notify_add_rx_port(gs);
-
- /* Mark socket as an encapsulation socket */
- tunnel_cfg.sk_user_data = gs;
- tunnel_cfg.encap_type = 1;
- tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
- tunnel_cfg.encap_destroy = NULL;
- setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
-
- list_add(&gs->list, &gn->sock_list);
-
- return gs;
-}
-
-struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
- geneve_rcv_t *rcv, void *data,
- bool no_share, bool ipv6)
-{
- struct geneve_sock *gs;
-
- mutex_lock(&geneve_mutex);
-
- gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
- if (gs) {
- if (!no_share && gs->rcv == rcv)
- gs->refcnt++;
- else
- gs = ERR_PTR(-EBUSY);
- } else {
- gs = geneve_socket_create(net, port, rcv, data, ipv6);
- }
-
- mutex_unlock(&geneve_mutex);
-
- return gs;
-}
-EXPORT_SYMBOL_GPL(geneve_sock_add);
-
-void geneve_sock_release(struct geneve_sock *gs)
-{
- mutex_lock(&geneve_mutex);
-
- if (--gs->refcnt)
- goto unlock;
-
- list_del(&gs->list);
- geneve_notify_del_rx_port(gs);
- udp_tunnel_sock_release(gs->sock);
- kfree_rcu(gs, rcu);
-
-unlock:
- mutex_unlock(&geneve_mutex);
-}
-EXPORT_SYMBOL_GPL(geneve_sock_release);
-
-static __net_init int geneve_init_net(struct net *net)
-{
- struct geneve_net *gn = net_generic(net, geneve_net_id);
-
- INIT_LIST_HEAD(&gn->sock_list);
-
- return 0;
-}
-
-static struct pernet_operations geneve_net_ops = {
- .init = geneve_init_net,
- .id = &geneve_net_id,
- .size = sizeof(struct geneve_net),
-};
-
-static int __init geneve_init_module(void)
-{
- int rc;
-
- rc = register_pernet_subsys(&geneve_net_ops);
- if (rc)
- return rc;
-
- pr_info("Geneve core logic\n");
-
- return 0;
-}
-module_init(geneve_init_module);
-
-static void __exit geneve_cleanup_module(void)
-{
- unregister_pernet_subsys(&geneve_net_ops);
-}
-module_exit(geneve_cleanup_module);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
-MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic");
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c6f1ce149ffb..79fe05befcae 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -309,9 +309,10 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
rc = false;
if (icmp_global_allow()) {
+ int vif = vrf_master_ifindex(dst->dev);
struct inet_peer *peer;
- peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
+ peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
rc = inet_peer_xrlim_allow(peer,
net->ipv4.sysctl_icmp_ratelimit);
if (peer)
@@ -426,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
- fl4.flowi4_oif = vrf_master_ifindex_rcu(skb->dev) ? : skb->dev->ifindex;
+ fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex;
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -460,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = vrf_master_ifindex_rcu(skb_in->dev) ? : skb_in->dev->ifindex;
+ fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex;
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key(net, fl4);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 651cdf648ec4..d38b8b61eaee 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -110,6 +110,9 @@
#define IP_MAX_MEMBERSHIPS 20
#define IP_MAX_MSF 10
+/* IGMP reports for link-local multicast groups are enabled by default */
+int sysctl_igmp_llm_reports __read_mostly = 1;
+
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
@@ -437,6 +440,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
+ if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
+ return skb;
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
@@ -545,6 +550,9 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
for_each_pmc_rcu(in_dev, pmc) {
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !sysctl_igmp_llm_reports)
+ continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
type = IGMPV3_MODE_IS_EXCLUDE;
@@ -678,7 +686,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- else if (type == IGMP_HOST_LEAVE_MESSAGE)
+
+ if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+ return 0;
+
+ if (type == IGMP_HOST_LEAVE_MESSAGE)
dst = IGMP_ALL_ROUTER;
else
dst = group;
@@ -851,6 +863,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
+ if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports)
+ return false;
rcu_read_lock();
for_each_pmc_rcu(in_dev, im) {
@@ -957,6 +971,9 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
continue;
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !sysctl_igmp_llm_reports)
+ continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
im->gsquery = im->gsquery && mark;
@@ -1181,6 +1198,8 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
+ if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+ return;
reporter = im->reporter;
igmp_stop_timer(im);
@@ -1213,6 +1232,8 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
+ if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports)
+ return;
if (in_dev->dead)
return;
@@ -1435,33 +1456,35 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
struct sk_buff *skb_chk;
unsigned int transport_len;
unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
- int ret;
+ int ret = -EINVAL;
transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
- skb_get(skb);
skb_chk = skb_checksum_trimmed(skb, transport_len,
ip_mc_validate_checksum);
if (!skb_chk)
- return -EINVAL;
+ goto err;
- if (!pskb_may_pull(skb_chk, len)) {
- kfree_skb(skb_chk);
- return -EINVAL;
- }
+ if (!pskb_may_pull(skb_chk, len))
+ goto err;
ret = ip_mc_check_igmp_msg(skb_chk);
- if (ret) {
- kfree_skb(skb_chk);
- return ret;
- }
+ if (ret)
+ goto err;
if (skb_trimmed)
*skb_trimmed = skb_chk;
- else
+ /* free now unneeded clone */
+ else if (skb_chk != skb)
kfree_skb(skb_chk);
- return 0;
+ ret = 0;
+
+err:
+ if (ret && skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return ret;
}
/**
@@ -1470,7 +1493,7 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
* @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
*
* Checks whether an IPv4 packet is a valid IGMP packet. If so sets
- * skb network and transport headers accordingly and returns zero.
+ * skb transport header accordingly and returns zero.
*
* -EINVAL: A broken packet was detected, i.e. it violates some internet
* standard
@@ -1485,7 +1508,8 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
* to leave the original skb and its full frame unchanged (which might be
* desirable for layer 2 frame jugglers).
*
- * The caller needs to release a reference count from any returned skb_trimmed.
+ * Caller needs to set the skb network header and free any returned skb if it
+ * differs from the provided skb.
*/
int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
{
@@ -1515,6 +1539,9 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !sysctl_igmp_llm_reports)
+ continue;
/* a failover is happening and switches
* must be notified immediately
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 05e3145f7dc3..134957159c27 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -593,7 +593,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
}
spin_unlock(&queue->syn_wait_lock);
- if (del_timer_sync(&req->rsk_timer))
+ if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
reqsk_put(req);
return found;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 241afd743d2c..86fa45809540 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -157,22 +157,6 @@ void __init inet_initpeers(void)
INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
}
-static int addr_compare(const struct inetpeer_addr *a,
- const struct inetpeer_addr *b)
-{
- int i, n = (a->family == AF_INET ? 1 : 4);
-
- for (i = 0; i < n; i++) {
- if (a->addr.a6[i] == b->addr.a6[i])
- continue;
- if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i])
- return -1;
- return 1;
- }
-
- return 0;
-}
-
#define rcu_deref_locked(X, BASE) \
rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
@@ -188,7 +172,7 @@ static int addr_compare(const struct inetpeer_addr *a,
*stackptr++ = &_base->root; \
for (u = rcu_deref_locked(_base->root, _base); \
u != peer_avl_empty;) { \
- int cmp = addr_compare(_daddr, &u->daddr); \
+ int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \
if (cmp == 0) \
break; \
if (cmp == -1) \
@@ -215,7 +199,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
int count = 0;
while (u != peer_avl_empty) {
- int cmp = addr_compare(daddr, &u->daddr);
+ int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
if (cmp == 0) {
/* Before taking a reference, check if this entry was
* deleted (refcnt=-1)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 15762e758861..fa7f15305f9a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -151,7 +151,8 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
qp->vif = arg->vif;
qp->user = arg->user;
qp->peer = sysctl_ipfrag_max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
+ inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ NULL;
}
static void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fb44d693796e..bd0679d90519 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -400,25 +400,14 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
if (tunnel) {
skb_pop_mac_header(skb);
if (tunnel->collect_md) {
- struct ip_tunnel_info *info;
+ __be16 flags;
+ __be64 tun_id;
- tun_dst = metadata_dst_alloc(0, GFP_ATOMIC);
+ flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tun_id = key_to_tunnel_id(tpi->key);
+ tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
return PACKET_REJECT;
-
- info = &tun_dst->u.tun_info;
- info->key.ipv4_src = iph->saddr;
- info->key.ipv4_dst = iph->daddr;
- info->key.ipv4_tos = iph->tos;
- info->key.ipv4_ttl = iph->ttl;
-
- info->mode = IP_TUNNEL_INFO_RX;
- info->key.tun_flags = tpi->flags &
- (TUNNEL_CSUM | TUNNEL_KEY);
- info->key.tun_id = key_to_tunnel_id(tpi->key);
-
- info->key.tp_src = 0;
- info->key.tp_dst = 0;
}
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
@@ -521,15 +510,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df, flags;
int err;
- tun_info = skb_tunnel_info(skb, AF_INET);
- if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
memset(&fl, 0, sizeof(fl));
- fl.daddr = key->ipv4_dst;
- fl.saddr = key->ipv4_src;
- fl.flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl.daddr = key->u.ipv4.dst;
+ fl.saddr = key->u.ipv4.src;
+ fl.flowi4_tos = RT_TOS(key->tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_GRE;
@@ -564,8 +554,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
- key->ipv4_dst, IPPROTO_GRE,
- key->ipv4_tos, key->ipv4_ttl, df, false);
+ key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index fd6319681c50..0c756ade1cf7 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -204,6 +204,7 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
};
static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
+ unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
struct ip_tunnel_info *tun_info;
@@ -227,16 +228,16 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]);
if (tb[LWTUNNEL_IP_DST])
- tun_info->key.ipv4_dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
+ tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
if (tb[LWTUNNEL_IP_SRC])
- tun_info->key.ipv4_src = nla_get_be32(tb[LWTUNNEL_IP_SRC]);
+ tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]);
if (tb[LWTUNNEL_IP_TTL])
- tun_info->key.ipv4_ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]);
+ tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]);
if (tb[LWTUNNEL_IP_TOS])
- tun_info->key.ipv4_tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
+ tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
if (tb[LWTUNNEL_IP_SPORT])
tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]);
@@ -262,10 +263,10 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
- nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.ipv4_dst) ||
- nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.ipv4_src) ||
- nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.ipv4_tos) ||
- nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ipv4_ttl) ||
+ nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
+ nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
+ nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
+ nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) ||
nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) ||
nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
@@ -286,15 +287,125 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+ nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
}
+static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ return memcmp(lwt_tun_info(a), lwt_tun_info(b),
+ sizeof(struct ip_tunnel_info));
+}
+
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
.build_state = ip_tun_build_state,
.fill_encap = ip_tun_fill_encap_info,
.get_encap_size = ip_tun_encap_nlsize,
+ .cmp_encap = ip_tun_cmp_encap,
+};
+
+static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
+ [LWTUNNEL_IP6_ID] = { .type = NLA_U64 },
+ [LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) },
+ [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) },
+ [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 },
+ [LWTUNNEL_IP6_TC] = { .type = NLA_U8 },
+ [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 },
+};
+
+static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts)
+{
+ struct ip_tunnel_info *tun_info;
+ struct lwtunnel_state *new_state;
+ struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy);
+ if (err < 0)
+ return err;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ if (!new_state)
+ return -ENOMEM;
+
+ new_state->type = LWTUNNEL_ENCAP_IP6;
+
+ tun_info = lwt_tun_info(new_state);
+
+ if (tb[LWTUNNEL_IP6_ID])
+ tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]);
+
+ if (tb[LWTUNNEL_IP6_DST])
+ tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]);
+
+ if (tb[LWTUNNEL_IP6_SRC])
+ tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]);
+
+ if (tb[LWTUNNEL_IP6_HOPLIMIT])
+ tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]);
+
+ if (tb[LWTUNNEL_IP6_TC])
+ tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
+
+ if (tb[LWTUNNEL_IP6_SPORT])
+ tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]);
+
+ if (tb[LWTUNNEL_IP6_DPORT])
+ tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]);
+
+ if (tb[LWTUNNEL_IP6_FLAGS])
+ tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]);
+
+ tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
+ tun_info->options = NULL;
+ tun_info->options_len = 0;
+
+ *ts = new_state;
+
+ return 0;
+}
+
+static int ip6_tun_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+
+ if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
+ nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
+ nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
+ nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
+ nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ return nla_total_size(8) /* LWTUNNEL_IP6_ID */
+ + nla_total_size(16) /* LWTUNNEL_IP6_DST */
+ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */
+ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
+ + nla_total_size(1) /* LWTUNNEL_IP6_TC */
+ + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */
+ + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */
+ + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
+}
+
+static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
+ .build_state = ip6_tun_build_state,
+ .fill_encap = ip6_tun_fill_encap_info,
+ .get_encap_size = ip6_tun_encap_nlsize,
+ .cmp_encap = ip_tun_cmp_encap,
};
void __init ip_tunnel_core_init(void)
{
lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
+ lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
}
struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2199a5db25e6..690d27d3f2f9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,6 +58,12 @@ config NFT_REJECT_IPV4
default NFT_REJECT
tristate
+config NFT_DUP_IPV4
+ tristate "IPv4 nf_tables packet duplication support"
+ select NF_DUP_IPV4
+ help
+ This module enables IPv4 packet duplication support for nf_tables.
+
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
@@ -67,6 +73,12 @@ config NF_TABLES_ARP
endif # NF_TABLES
+config NF_DUP_IPV4
+ tristate "Netfilter IPv4 packet duplication to alternate destination"
+ help
+ This option enables the nf_dup_ipv4 core, which duplicates an IPv4
+ packet to be rerouted to another destination.
+
config NF_LOG_ARP
tristate "ARP packet logging"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7fe6c703528f..87b073da14c9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
+obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
@@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c416cb355cb0..8f87fc38ccde 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -367,13 +367,10 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
- *
- * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
-static int mark_source_chains(struct xt_table_info *newinfo,
+static int mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
- unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -389,7 +386,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
- calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -444,8 +440,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
- if (calldepth > 0)
- --calldepth;
} else {
int newpos = t->verdict;
@@ -460,10 +454,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
return 0;
}
- if (entry0 + newpos != arpt_next_entry(e) &&
- ++calldepth > max_calldepth)
- max_calldepth = calldepth;
-
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -480,7 +470,6 @@ static int mark_source_chains(struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
- newinfo->stacksize = max_calldepth;
return 1;
}
@@ -670,6 +659,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
break;
++i;
+ if (strcmp(arpt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -1442,6 +1434,9 @@ static int translate_compat_table(const char *name,
break;
}
++i;
+ if (strcmp(arpt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 787f99ed55e2..b0a86e73451c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -443,15 +443,11 @@ ipt_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- * there are loops. Puts hook bitmask in comefrom.
- *
- * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
- */
+ there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
- unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -465,7 +461,6 @@ mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
- calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -527,9 +522,6 @@ mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
- WARN_ON_ONCE(calldepth == 0);
- if (calldepth > 0)
- --calldepth;
} else {
int newpos = t->verdict;
@@ -543,14 +535,9 @@ mark_source_chains(struct xt_table_info *newinfo,
newpos);
return 0;
}
- if (entry0 + newpos != ipt_next_entry(e) &&
- !(e->ip.flags & IPT_F_GOTO) &&
- ++calldepth > max_calldepth)
- max_calldepth = calldepth;
-
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u, calldepth %d\n",
- pos, newpos, calldepth);
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -564,7 +551,6 @@ mark_source_chains(struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
- newinfo->stacksize = max_calldepth;
return 1;
}
@@ -844,6 +830,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
+ if (strcmp(ipt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1759,6 +1748,9 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
+ if (strcmp(ipt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4bf3dc49ad1e..270765236f5e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -72,7 +72,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
tcph->cwr = einfo->proto.tcp.cwr;
inet_proto_csum_replace2(&tcph->check, skb,
- oldval, ((__be16 *)tcph)[6], 0);
+ oldval, ((__be16 *)tcph)[6], false);
return true;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 30ad9554b5e9..8a2caaf3940b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
}
- h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 80d5554b9a88..cdde3ec496e9 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuplepr(skb,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index b69e82bda215..9306ec4fab41 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -43,19 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
- u16 zone = NF_CT_DEFAULT_ZONE;
-
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- if (skb->nfct)
- zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+ if (skb->nfct) {
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+ }
#endif
if (nf_bridge_in_prerouting(skb))
- return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+ return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
if (hooknum == NF_INET_PRE_ROUTING)
- return IP_DEFRAG_CONNTRACK_IN + zone;
+ return IP_DEFRAG_CONNTRACK_IN + zone_id;
else
- return IP_DEFRAG_CONNTRACK_OUT + zone;
+ return IP_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
new file mode 100644
index 000000000000..b5bb37564b0e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+ const struct dst_entry *dst;
+
+ if (skb->dev != NULL)
+ return dev_net(skb->dev);
+ dst = skb_dst(skb);
+ if (dst != NULL && dst->dev != NULL)
+ return dev_net(dst->dev);
+#endif
+ return &init_net;
+}
+
+static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
+ int oif)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = pick_net(skb);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ if (oif != -1)
+ fl4.flowi4_oif = oif;
+
+ fl4.daddr = gw->s_addr;
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ return false;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+ skb->dev = rt->dst.dev;
+ skb->protocol = htons(ETH_P_IP);
+
+ return true;
+}
+
+void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct in_addr *gw, int oif)
+{
+ struct iphdr *iph;
+
+ if (this_cpu_read(nf_skb_duplicated))
+ return;
+ /*
+ * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+ * the original skb, which should continue on its way as if nothing has
+ * happened. The copy should be independently delivered to the gateway.
+ */
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Avoid counting cloned packets towards the original connection. */
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ /*
+ * If we are in PREROUTING/INPUT, the checksum must be recalculated
+ * since the length could have changed as a result of defragmentation.
+ *
+ * We also decrease the TTL to mitigate potential loops between two
+ * hosts.
+ *
+ * Set %IP_DF so that the original source is notified of a potentially
+ * decreased MTU on the clone route. IPv6 does this too.
+ */
+ iph = ip_hdr(skb);
+ iph->frag_off |= htons(IP_DF);
+ if (hooknum == NF_INET_PRE_ROUTING ||
+ hooknum == NF_INET_LOCAL_IN)
+ --iph->ttl;
+ ip_send_check(iph);
+
+ if (nf_dup_ipv4_route(skb, gw, oif)) {
+ __this_cpu_write(nf_skb_duplicated, true);
+ ip_local_out(skb);
+ __this_cpu_write(nf_skb_duplicated, false);
+ } else {
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv4);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index e59cc05c09e9..22f4579b0c2a 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -120,7 +120,7 @@ static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
oldip = iph->daddr;
newip = t->dst.u3.ip;
}
- inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+ inet_proto_csum_replace4(check, skb, oldip, newip, true);
}
static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
@@ -151,7 +151,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
}
} else
inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
+ htons(oldlen), htons(datalen), true);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 4557b4ab8342..7b98baa13ede 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff *skb,
hdr = (struct icmphdr *)(skb->data + hdroff);
inet_proto_csum_replace2(&hdr->checksum, skb,
- hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id, tuple->src.u.icmp.id, false);
hdr->un.echo.id = tuple->src.u.icmp.id;
return true;
}
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
new file mode 100644
index 000000000000..b45932d43b69
--- /dev/null
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+
+struct nft_dup_ipv4 {
+ enum nft_registers sreg_addr:8;
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+ struct in_addr gw = {
+ .s_addr = (__force __be32)regs->data[priv->sreg_addr],
+ };
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
+}
+
+static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+ return -EINVAL;
+
+ priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+ err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ }
+ return 0;
+}
+
+static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv4_type;
+static const struct nft_expr_ops nft_dup_ipv4_ops = {
+ .type = &nft_dup_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)),
+ .eval = nft_dup_ipv4_eval,
+ .init = nft_dup_ipv4_init,
+ .dump = nft_dup_ipv4_dump,
+};
+
+static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "dup",
+ .ops = &nft_dup_ipv4_ops,
+ .policy = nft_dup_ipv4_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_dup_ipv4_type);
+}
+
+static void __exit nft_dup_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_ipv4_type);
+}
+
+module_init(nft_dup_ipv4_module_init);
+module_exit(nft_dup_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2c89d294b669..5f4a5565ad8b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -838,6 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
struct inet_peer *peer;
struct net *net;
int log_martians;
+ int vif;
rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
@@ -846,10 +847,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
return;
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
+ vif = vrf_master_ifindex_rcu(rt->dst.dev);
rcu_read_unlock();
net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
if (!peer) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
rt_nexthop(rt, ip_hdr(skb)->daddr));
@@ -938,7 +940,8 @@ static int ip_error(struct sk_buff *skb)
break;
}
- peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
+ vrf_master_ifindex(skb->dev), 1);
send = true;
if (peer) {
@@ -1359,7 +1362,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
list_del(&rt->rt_uncached);
spin_unlock_bh(&ul->lock);
}
- lwtstate_put(rt->rt_lwtstate);
}
void rt_flush_dev(struct net_device *dev)
@@ -1408,7 +1410,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- rt->rt_lwtstate = lwtstate_get(nh->nh_lwtstate);
+ rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
@@ -1494,7 +1496,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
- rth->rt_lwtstate = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1624,15 +1625,20 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
- rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
- if (lwtunnel_output_redirect(rth->rt_lwtstate))
+ if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_output = rth->dst.output;
rth->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_input = rth->dst.input;
+ rth->dst.input = lwtunnel_input;
+ }
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1689,8 +1695,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
by fib_lookup.
*/
- tun_info = skb_tunnel_info(skb, AF_INET);
- if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
else
fl4.flowi4_tun_key.tun_id = 0;
@@ -1809,7 +1815,6 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
- rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
@@ -2000,7 +2005,6 @@ add:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
- rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(out_slow_tot);
if (flags & RTCF_LOCAL)
@@ -2023,7 +2027,7 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
- if (lwtunnel_output_redirect(rth->rt_lwtstate))
+ if (lwtunnel_output_redirect(rth->dst.lwtstate))
rth->dst.output = lwtunnel_output;
return rth;
@@ -2287,7 +2291,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
- rt->rt_lwtstate = NULL;
dst_free(new);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 433231ccfb17..894da3a70aff 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
static int zero;
static int one = 1;
static int four = 4;
+static int thousand = 1000;
static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
@@ -41,8 +42,6 @@ static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
-static int min_sndbuf = SOCK_MIN_SNDBUF;
-static int min_rcvbuf = SOCK_MIN_RCVBUF;
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
@@ -530,7 +529,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_sndbuf,
+ .extra1 = &one,
},
{
.procname = "tcp_notsent_lowat",
@@ -545,7 +544,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_rcvbuf,
+ .extra1 = &one,
},
{
.procname = "tcp_app_win",
@@ -714,6 +713,24 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &gso_max_segs,
},
{
+ .procname = "tcp_pacing_ss_ratio",
+ .data = &sysctl_tcp_pacing_ss_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
+ {
+ .procname = "tcp_pacing_ca_ratio",
+ .data = &sysctl_tcp_pacing_ca_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
+ {
.procname = "tcp_autocorking",
.data = &sysctl_tcp_autocorking,
.maxlen = sizeof(int),
@@ -758,7 +775,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_udp_rmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_rcvbuf,
+ .extra1 = &one
},
{
.procname = "udp_wmem_min",
@@ -766,7 +783,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(sysctl_udp_wmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &min_sndbuf,
+ .extra1 = &one
},
{ }
};
@@ -912,6 +929,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "igmp_link_local_mcast_reports",
+ .data = &sysctl_igmp_llm_reports,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 45534a5ab430..b8b8fa184f75 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -627,6 +627,8 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb)
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
+
+ tcp_slow_start_after_idle_check(sk);
}
static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4e4d6bcd0ca9..dc08e2352665 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
* TCP pacing, to smooth the burst on large writes when packets
* in flight is significantly lower than cwnd (or rwin)
*/
+int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
+int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
+
static void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
u64 rate;
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
- rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
+ rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
+
+ /* current rate is (cwnd * mss) / srtt
+ * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
+ * In Congestion Avoidance phase, set it to 120 % the current rate.
+ *
+ * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+ * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ * end of slow start and should slow down.
+ */
+ if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ rate *= sysctl_tcp_pacing_ss_ratio;
+ else
+ rate *= sysctl_tcp_pacing_ca_ratio;
rate *= max(tp->snd_cwnd, tp->packets_out);
@@ -3332,6 +3348,9 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
tp->pred_flags = 0;
tcp_fast_path_check(sk);
+ if (tcp_send_head(sk))
+ tcp_slow_start_after_idle_check(sk);
+
if (nwin > tp->max_window) {
tp->max_window = nwin;
tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b3d64f61d922..c8cbc2b4b792 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -81,11 +81,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- if (a->family != b->family)
- return false;
- if (a->family == AF_INET)
- return a->addr.a4 == b->addr.a4;
- return ipv6_addr_equal(&a->addr.in6, &b->addr.in6);
+ return inetpeer_addr_cmp(a, b) == 0;
}
struct tcpm_hash_bucket {
@@ -247,14 +243,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
daddr.family = req->rsk_ops->family;
switch (daddr.family) {
case AF_INET:
- saddr.addr.a4 = inet_rsk(req)->ir_loc_addr;
- daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
- hash = (__force unsigned int) daddr.addr.a4;
+ inetpeer_set_addr_v4(&saddr, inet_rsk(req)->ir_loc_addr);
+ inetpeer_set_addr_v4(&daddr, inet_rsk(req)->ir_rmt_addr);
+ hash = ipv4_addr_hash(inet_rsk(req)->ir_rmt_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr;
- daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr;
+ inetpeer_set_addr_v6(&saddr, &inet_rsk(req)->ir_v6_loc_addr);
+ inetpeer_set_addr_v6(&daddr, &inet_rsk(req)->ir_v6_rmt_addr);
hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
break;
#endif
@@ -285,25 +281,19 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
struct net *net;
if (tw->tw_family == AF_INET) {
- saddr.family = AF_INET;
- saddr.addr.a4 = tw->tw_rcv_saddr;
- daddr.family = AF_INET;
- daddr.addr.a4 = tw->tw_daddr;
- hash = (__force unsigned int) daddr.addr.a4;
+ inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
+ inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
+ hash = ipv4_addr_hash(tw->tw_daddr);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (tw->tw_family == AF_INET6) {
if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
- saddr.family = AF_INET;
- saddr.addr.a4 = tw->tw_rcv_saddr;
- daddr.family = AF_INET;
- daddr.addr.a4 = tw->tw_daddr;
- hash = (__force unsigned int) daddr.addr.a4;
+ inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
+ inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
+ hash = ipv4_addr_hash(tw->tw_daddr);
} else {
- saddr.family = AF_INET6;
- saddr.addr.in6 = tw->tw_v6_rcv_saddr;
- daddr.family = AF_INET6;
- daddr.addr.in6 = tw->tw_v6_daddr;
+ inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
+ inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
hash = ipv6_addr_hash(&tw->tw_v6_daddr);
}
}
@@ -335,25 +325,19 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct net *net;
if (sk->sk_family == AF_INET) {
- saddr.family = AF_INET;
- saddr.addr.a4 = inet_sk(sk)->inet_saddr;
- daddr.family = AF_INET;
- daddr.addr.a4 = inet_sk(sk)->inet_daddr;
- hash = (__force unsigned int) daddr.addr.a4;
+ inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr);
+ inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr);
+ hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
- saddr.family = AF_INET;
- saddr.addr.a4 = inet_sk(sk)->inet_saddr;
- daddr.family = AF_INET;
- daddr.addr.a4 = inet_sk(sk)->inet_daddr;
- hash = (__force unsigned int) daddr.addr.a4;
+ inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr);
+ inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr);
+ hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr);
} else {
- saddr.family = AF_INET6;
- saddr.addr.in6 = sk->sk_v6_rcv_saddr;
- daddr.family = AF_INET6;
- daddr.addr.in6 = sk->sk_v6_daddr;
+ inetpeer_set_addr_v6(&saddr, &sk->sk_v6_rcv_saddr);
+ inetpeer_set_addr_v6(&daddr, &sk->sk_v6_daddr);
hash = ipv6_addr_hash(&sk->sk_v6_daddr);
}
}
@@ -796,18 +780,18 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
switch (tm->tcpm_daddr.family) {
case AF_INET:
if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4,
- tm->tcpm_daddr.addr.a4) < 0)
+ inetpeer_get_addr_v4(&tm->tcpm_daddr)) < 0)
goto nla_put_failure;
if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4,
- tm->tcpm_saddr.addr.a4) < 0)
+ inetpeer_get_addr_v4(&tm->tcpm_saddr)) < 0)
goto nla_put_failure;
break;
case AF_INET6:
if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6,
- &tm->tcpm_daddr.addr.in6) < 0)
+ inetpeer_get_addr_v6(&tm->tcpm_daddr)) < 0)
goto nla_put_failure;
if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6,
- &tm->tcpm_saddr.addr.in6) < 0)
+ inetpeer_get_addr_v6(&tm->tcpm_saddr)) < 0)
goto nla_put_failure;
break;
default:
@@ -956,20 +940,21 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
a = info->attrs[v4];
if (a) {
- addr->family = AF_INET;
- addr->addr.a4 = nla_get_in_addr(a);
+ inetpeer_set_addr_v4(addr, nla_get_in_addr(a));
if (hash)
- *hash = (__force unsigned int) addr->addr.a4;
+ *hash = ipv4_addr_hash(inetpeer_get_addr_v4(addr));
return 0;
}
a = info->attrs[v6];
if (a) {
+ struct in6_addr in6;
+
if (nla_len(a) != sizeof(struct in6_addr))
return -EINVAL;
- addr->family = AF_INET6;
- addr->addr.in6 = nla_get_in6_addr(a);
+ in6 = nla_get_in6_addr(a);
+ inetpeer_set_addr_v6(addr, &in6);
if (hash)
- *hash = ipv6_addr_hash(&addr->addr.in6);
+ *hash = ipv6_addr_hash(inetpeer_get_addr_v6(addr));
return 0;
}
return optional ? 1 : -EAFNOSUPPORT;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 444ab5beecbd..1188e4fcf23b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -137,12 +137,12 @@ static __u16 tcp_advertise_mss(struct sock *sk)
}
/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
- * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
+ * This is the first part of cwnd validation mechanism.
+ */
+void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
- s32 delta = tcp_time_stamp - tp->lsndtime;
- u32 restart_cwnd = tcp_init_cwnd(tp, dst);
+ u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 cwnd = tp->snd_cwnd;
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -164,10 +164,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
- if (sysctl_tcp_slow_start_after_idle &&
- (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
- tcp_cwnd_restart(sk, __sk_dst_get(sk));
-
tp->lsndtime = now;
/* If it is a reply for ato after last received
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 933ea903f7b8..aba428626b52 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -4,9 +4,10 @@
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <net/dst_metadata.h>
+#include <net/net_namespace.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
-#include <net/net_namespace.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -103,4 +104,26 @@ void udp_tunnel_sock_release(struct socket *sock)
}
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
+ __be16 flags, __be64 tunnel_id, int md_size)
+{
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *info;
+
+ if (family == AF_INET)
+ tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
+ else
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size);
+ if (!tun_dst)
+ return NULL;
+
+ info = &tun_dst->u.tun_info;
+ info->key.tp_src = udp_hdr(skb)->source;
+ info->key.tp_dst = udp_hdr(skb)->dest;
+ if (udp_hdr(skb)->check)
+ info->key.tun_flags |= TUNNEL_CSUM;
+ return tun_dst;
+}
+EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 55b3c0f4dde5..bb919b28619f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,6 +15,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/vrf.h>
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
@@ -107,8 +108,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
- if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
+ if (skb_dst(skb)) {
+ oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
+ : skb_dst(skb)->dev->ifindex;
+ }
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 643f61339e7b..983bb999738c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -92,6 +92,25 @@ config IPV6_MIP6
If unsure, say N.
+config IPV6_ILA
+ tristate "IPv6: Identifier Locator Addressing (ILA)"
+ select LWTUNNEL
+ ---help---
+ Support for IPv6 Identifier Locator Addressing (ILA).
+
+ ILA is a mechanism to do network virtualization without
+ encapsulation. The basic concept of ILA is that we split an
+ IPv6 address into a 64 bit locator and 64 bit identifier. The
+ identifier is the identity of an entity in communication
+ ("who") and the locator expresses the location of the
+ entity ("where").
+
+ ILA can be configured using the "encap ila" option with
+ "ip -6 route" command. ILA is described in
+ https://tools.ietf.org/html/draft-herbert-nvo3-ila-00.
+
+ If unsure, say N.
+
config INET6_XFRM_TUNNEL
tristate
select INET6_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 0f3f1999719a..2c900c7b7eb1 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
+obj-$(CONFIG_IPV6_ILA) += ila.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 59242399b0b5..0f08d3b9e238 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3656,7 +3656,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
out:
in6_ifa_put(ifp);
rtnl_unlock();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ed7d4e3f9c10..0630a4d5daaa 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -577,8 +577,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
ahp->icv_trunc_len + seqhi_len);
- if (!work_iph)
+ if (!work_iph) {
+ err = -ENOMEM;
goto out;
+ }
auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
new file mode 100644
index 000000000000..678d2df4b8d9
--- /dev/null
+++ b/net/ipv6/ila.c
@@ -0,0 +1,229 @@
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ip6_fib.h>
+#include <net/lwtunnel.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+
+struct ila_params {
+ __be64 locator;
+ __be64 locator_match;
+ __wsum csum_diff;
+};
+
+static inline struct ila_params *ila_params_lwtunnel(
+ struct lwtunnel_state *lwstate)
+{
+ return (struct ila_params *)lwstate->data;
+}
+
+static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], to[0], to[1],
+ };
+
+ return csum_partial(diff, sizeof(diff), 0);
+}
+
+static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ return p->csum_diff;
+ else
+ return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ (__be32 *)&p->locator);
+}
+
+static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ __wsum diff;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ size_t nhoff = sizeof(struct ipv6hdr);
+
+ /* First update checksum */
+ switch (ip6h->nexthdr) {
+ case NEXTHDR_TCP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
+ struct tcphdr *th = (struct tcphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+ diff, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
+ struct udphdr *uh = (struct udphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+ diff, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ }
+ break;
+ case NEXTHDR_ICMP:
+ if (likely(pskb_may_pull(skb,
+ nhoff + sizeof(struct icmp6hdr)))) {
+ struct icmp6hdr *ih = (struct icmp6hdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+ diff, true);
+ }
+ break;
+ }
+
+ /* Now change destination address */
+ *(__be64 *)&ip6h->daddr = p->locator;
+}
+
+static int ila_output(struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_output(sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int ila_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+ [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+};
+
+static int ila_build_state(struct net_device *dev, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts)
+{
+ struct ila_params *p;
+ struct nlattr *tb[ILA_ATTR_MAX + 1];
+ size_t encap_len = sizeof(*p);
+ struct lwtunnel_state *newts;
+ const struct fib6_config *cfg6 = cfg;
+ int ret;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
+ ila_nl_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(encap_len);
+ if (!newts)
+ return -ENOMEM;
+
+ newts->len = encap_len;
+ p = ila_params_lwtunnel(newts);
+
+ p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+
+ if (cfg6->fc_dst_len > sizeof(__be64)) {
+ /* Precompute checksum difference for translation since we
+ * know both the old locator and the new one.
+ */
+ p->locator_match = *(__be64 *)&cfg6->fc_dst;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match, (__be32 *)&p->locator);
+ }
+
+ newts->type = LWTUNNEL_ENCAP_ILA;
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+ LWTUNNEL_STATE_INPUT_REDIRECT;
+
+ *ts = newts;
+
+ return 0;
+}
+
+static int ila_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ila_params *p = ila_params_lwtunnel(lwtstate);
+
+ if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ /* No encapsulation overhead */
+ return 0;
+}
+
+static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ila_params *a_p = ila_params_lwtunnel(a);
+ struct ila_params *b_p = ila_params_lwtunnel(b);
+
+ return (a_p->locator != b_p->locator);
+}
+
+static const struct lwtunnel_encap_ops ila_encap_ops = {
+ .build_state = ila_build_state,
+ .output = ila_output,
+ .input = ila_input,
+ .fill_encap = ila_fill_encap_info,
+ .get_encap_size = ila_encap_nlsize,
+ .cmp_encap = ila_encap_cmp,
+};
+
+static int __init ila_init(void)
+{
+ return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+static void __exit ila_fini(void)
+{
+ lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5693b5eb8482..418d9823692b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -173,12 +173,13 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL;
}
}
+
+ non_pcpu_rt->rt6i_pcpu = NULL;
}
static void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
- lwtstate_put(rt->rt6i_lwtstate);
rt6_free_pcpu(rt);
dst_free(&rt->dst);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 34f121812a14..4038c694ec03 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
+ ip6_tnl_dst_reset(t);
dev_put(dev);
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index e1a1136bda7c..14dacf1df529 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -23,6 +23,15 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
+ if (cfg->ipv6_v6only) {
+ int val = 1;
+
+ err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
+ (char *) &val, sizeof(val));
+ if (err < 0)
+ goto error;
+ }
+
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
sizeof(udp6_addr.sin6_addr));
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index df8afe5ab31e..9405b04eecc6 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -143,34 +143,36 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb,
struct sk_buff *skb_chk = NULL;
unsigned int transport_len;
unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
- int ret;
+ int ret = -EINVAL;
transport_len = ntohs(ipv6_hdr(skb)->payload_len);
transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
- skb_get(skb);
skb_chk = skb_checksum_trimmed(skb, transport_len,
ipv6_mc_validate_checksum);
if (!skb_chk)
- return -EINVAL;
+ goto err;
- if (!pskb_may_pull(skb_chk, len)) {
- kfree_skb(skb_chk);
- return -EINVAL;
- }
+ if (!pskb_may_pull(skb_chk, len))
+ goto err;
ret = ipv6_mc_check_mld_msg(skb_chk);
- if (ret) {
- kfree_skb(skb_chk);
- return ret;
- }
+ if (ret)
+ goto err;
if (skb_trimmed)
*skb_trimmed = skb_chk;
- else
+ /* free now unneeded clone */
+ else if (skb_chk != skb)
kfree_skb(skb_chk);
- return 0;
+ ret = 0;
+
+err:
+ if (ret && skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return ret;
}
/**
@@ -179,7 +181,7 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb,
* @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
*
* Checks whether an IPv6 packet is a valid MLD packet. If so sets
- * skb network and transport headers accordingly and returns zero.
+ * skb transport header accordingly and returns zero.
*
* -EINVAL: A broken packet was detected, i.e. it violates some internet
* standard
@@ -194,7 +196,8 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb,
* to leave the original skb and its full frame unchanged (which might be
* desirable for layer 2 frame jugglers).
*
- * The caller needs to release a reference count from any returned skb_trimmed.
+ * Caller needs to set the skb network header and free any returned skb if it
+ * differs from the provided skb.
*/
int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
{
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3054611f88a..13d3c2beb93e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -553,7 +553,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr)
+ const struct in6_addr *daddr, const struct in6_addr *saddr,
+ struct sk_buff *oskb)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -589,6 +590,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr);
+ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
+ skb_dst_copy(skb, oskb);
+
ndisc_send_skb(skb, daddr, saddr);
}
@@ -675,12 +679,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, neigh, target, target, saddr);
+ ndisc_send_ns(dev, neigh, target, target, saddr, skb);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
}
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index b552cf0d6198..96833e4b3193 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -47,9 +47,21 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
+config NFT_DUP_IPV6
+ tristate "IPv6 nf_tables packet duplication support"
+ select NF_DUP_IPV6
+ help
+ This module enables IPv6 packet duplication support for nf_tables.
+
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_DUP_IPV6
+ tristate "Netfilter IPv6 packet duplication to alternate destination"
+ help
+ This option enables the nf_dup_ipv6 core, which duplicates an IPv6
+ packet to be rerouted to another destination.
+
config NF_REJECT_IPV6
tristate "IPv6 packet rejection"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c36e0a5490de..b4f7d0b4e2af 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
# reject
obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
+obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
@@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
+obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4e21f80228be..0771991ed812 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -455,15 +455,11 @@ ip6t_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- * there are loops. Puts hook bitmask in comefrom.
- *
- * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
- */
+ there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
- unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -477,7 +473,6 @@ mark_source_chains(struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
- calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -539,8 +534,6 @@ mark_source_chains(struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
- if (calldepth > 0)
- --calldepth;
} else {
int newpos = t->verdict;
@@ -554,11 +547,6 @@ mark_source_chains(struct xt_table_info *newinfo,
newpos);
return 0;
}
- if (entry0 + newpos != ip6t_next_entry(e) &&
- !(e->ipv6.flags & IP6T_F_GOTO) &&
- ++calldepth > max_calldepth)
- max_calldepth = calldepth;
-
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -575,7 +563,6 @@ mark_source_chains(struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
- newinfo->stacksize = max_calldepth;
return 1;
}
@@ -855,6 +842,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
+ if (strcmp(ip6t_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1767,6 +1757,9 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
+ if (strcmp(ip6t_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 567367a75172..0ed841a3fa33 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -63,6 +63,12 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum);
break;
+ case IP6T_ICMP6_POLICY_FAIL:
+ nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
+ break;
+ case IP6T_ICMP6_REJECT_ROUTE:
+ nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
+ break;
}
return NF_DROP;
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index ebbb754c2111..1e4bf99ed16e 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -237,7 +237,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
nth->ack_seq = th->ack_seq;
tcp_flag_word(nth) = TCP_FLAG_ACK;
nth->doff = tcp_hdr_size / 4;
- nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
nth->check = 0;
nth->urg_ptr = 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4ba0c34c627b..7302900c321a 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
- h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 90388d606483..0e6fae103d33 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_l4proto *inproto;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
@@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(net, zone, &intuple);
+ h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+ &intuple);
if (!h) {
pr_debug("icmpv6_error: no match\n");
return -NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6d02498172c1..701cd2bae0a9 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -633,6 +633,7 @@ ret_orig:
kfree_skb(clone);
return skb;
}
+EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 267fb8d5876e..6d9c0b3d5b8c 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -33,20 +33,22 @@
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
- u16 zone = NF_CT_DEFAULT_ZONE;
-
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- if (skb->nfct)
- zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+ if (skb->nfct) {
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+ }
#endif
if (nf_bridge_in_prerouting(skb))
- return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+ return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
if (hooknum == NF_INET_PRE_ROUTING)
- return IP6_DEFRAG_CONNTRACK_IN + zone;
+ return IP6_DEFRAG_CONNTRACK_IN + zone_id;
else
- return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
+ return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
new file mode 100644
index 000000000000..c5c87e921ccd
--- /dev/null
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -0,0 +1,96 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+ const struct dst_entry *dst;
+
+ if (skb->dev != NULL)
+ return dev_net(skb->dev);
+ dst = skb_dst(skb);
+ if (dst != NULL && dst->dev != NULL)
+ return dev_net(dst->dev);
+#endif
+ return &init_net;
+}
+
+static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
+ int oif)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = pick_net(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ if (oif != -1)
+ fl6.flowi6_oif = oif;
+
+ fl6.daddr = *gw;
+ fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) |
+ (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]);
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ return false;
+ }
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+ skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ return true;
+}
+
+void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+ const struct in6_addr *gw, int oif)
+{
+ if (this_cpu_read(nf_skb_duplicated))
+ return;
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING ||
+ hooknum == NF_INET_LOCAL_IN) {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ --iph->hop_limit;
+ }
+ if (nf_dup_ipv6_route(skb, gw, oif)) {
+ __this_cpu_write(nf_skb_duplicated, true);
+ ip6_local_out(skb);
+ __this_cpu_write(nf_skb_duplicated, false);
+ } else {
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv6);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index e76900e0aa92..70fbaed49edb 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
newip = &t->dst.u3.in6;
}
inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
- newip->s6_addr32, 1);
+ newip->s6_addr32, true);
}
static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
@@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
}
} else
inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
+ htons(oldlen), htons(datalen), true);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 2205e8eeeacf..57593b00c5b4 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
- tuple->src.u.icmp.id, 0);
+ tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
new file mode 100644
index 000000000000..0eaa4f65fdea
--- /dev/null
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+
+struct nft_dup_ipv6 {
+ enum nft_registers sreg_addr:8;
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
+}
+
+static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+ return -EINVAL;
+
+ priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+ err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ }
+ return 0;
+}
+
+static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv6_type;
+static const struct nft_expr_ops nft_dup_ipv6_ops = {
+ .type = &nft_dup_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)),
+ .eval = nft_dup_ipv6_eval,
+ .init = nft_dup_ipv6_init,
+ .dump = nft_dup_ipv6_dump,
+};
+
+static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "dup",
+ .ops = &nft_dup_ipv6_ops,
+ .policy = nft_dup_ipv6_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_dup_ipv6_type);
+}
+
+static void __exit nft_dup_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_ipv6_type);
+}
+
+module_init(nft_dup_ipv6_module_init);
+module_exit(nft_dup_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1c0217e61357..308dd5f9158f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -54,11 +54,13 @@
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/nexthop.h>
#include <net/lwtunnel.h>
+#include <net/ip_tunnels.h>
#include <asm/uaccess.h>
@@ -319,8 +321,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
/* allocate dst with ip6_dst_ops */
static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct net_device *dev,
- int flags,
- struct fib6_table *table)
+ int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -337,10 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
static struct rt6_info *ip6_dst_alloc(struct net *net,
struct net_device *dev,
- int flags,
- struct fib6_table *table)
+ int flags)
{
- struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
+ struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
if (rt) {
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
@@ -538,7 +538,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
@@ -957,8 +957,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = (struct rt6_info *)ort->dst.from;
- rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
- 0, ort->rt6i_table);
+ rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
if (!rt)
return NULL;
@@ -990,8 +989,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
struct rt6_info *pcpu_rt;
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
- rt->dst.dev, rt->dst.flags,
- rt->rt6i_table);
+ rt->dst.dev, rt->dst.flags);
if (!pcpu_rt)
return NULL;
@@ -1004,32 +1002,53 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
/* It should be called with read_lock_bh(&tb6_lock) acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{
- struct rt6_info *pcpu_rt, *prev, **p;
+ struct rt6_info *pcpu_rt, **p;
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt)
- goto done;
+ if (pcpu_rt) {
+ dst_hold(&pcpu_rt->dst);
+ rt6_dst_from_metrics_check(pcpu_rt);
+ }
+ return pcpu_rt;
+}
+
+static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
if (!pcpu_rt) {
struct net *net = dev_net(rt->dst.dev);
- pcpu_rt = net->ipv6.ip6_null_entry;
- goto done;
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return net->ipv6.ip6_null_entry;
}
- prev = cmpxchg(p, NULL, pcpu_rt);
- if (prev) {
- /* If someone did it before us, return prev instead */
+ read_lock_bh(&table->tb6_lock);
+ if (rt->rt6i_pcpu) {
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ if (prev) {
+ /* If someone did it before us, return prev instead */
+ dst_destroy(&pcpu_rt->dst);
+ pcpu_rt = prev;
+ }
+ } else {
+ /* rt has been removed from the fib6 tree
+ * before we have a chance to acquire the read_lock.
+ * In this case, don't brother to create a pcpu rt
+ * since rt is going away anyway. The next
+ * dst_check() will trigger a re-lookup.
+ */
dst_destroy(&pcpu_rt->dst);
- pcpu_rt = prev;
+ pcpu_rt = rt;
}
-
-done:
dst_hold(&pcpu_rt->dst);
rt6_dst_from_metrics_check(pcpu_rt);
+ read_unlock_bh(&table->tb6_lock);
return pcpu_rt;
}
@@ -1104,9 +1123,22 @@ redo_rt6_select:
rt->dst.lastuse = jiffies;
rt->dst.__use++;
pcpu_rt = rt6_get_pcpu_route(rt);
- read_unlock_bh(&table->tb6_lock);
+
+ if (pcpu_rt) {
+ read_unlock_bh(&table->tb6_lock);
+ } else {
+ /* We have to do the read_unlock first
+ * because rt6_make_pcpu_route() may trigger
+ * ip6_dst_gc() which will take the write_lock.
+ */
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ dst_release(&rt->dst);
+ }
return pcpu_rt;
+
}
}
@@ -1131,6 +1163,7 @@ void ip6_route_input(struct sk_buff *skb)
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
@@ -1140,6 +1173,10 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_proto = iph->nexthdr,
};
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
+ fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1562,7 +1599,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(net, dev, 0, NULL);
+ rt = ip6_dst_alloc(net, dev, 0);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1749,7 +1786,8 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL,
+ (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
if (!rt) {
err = -ENOMEM;
@@ -1781,12 +1819,19 @@ int ip6_route_add(struct fib6_config *cfg)
struct lwtunnel_state *lwtstate;
err = lwtunnel_build_state(dev, cfg->fc_encap_type,
- cfg->fc_encap, &lwtstate);
+ cfg->fc_encap, AF_INET6, cfg,
+ &lwtstate);
if (err)
goto out;
- rt->rt6i_lwtstate = lwtstate_get(lwtstate);
- if (lwtunnel_output_redirect(rt->rt6i_lwtstate))
- rt->dst.output = lwtunnel_output6;
+ rt->dst.lwtstate = lwtstate_get(lwtstate);
+ if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_output = rt->dst.output;
+ rt->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_input = rt->dst.input;
+ rt->dst.input = lwtunnel_input;
+ }
}
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2168,7 +2213,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
#endif
rt->rt6i_prefsrc = ort->rt6i_prefsrc;
rt->rt6i_table = ort->rt6i_table;
- rt->rt6i_lwtstate = lwtstate_get(ort->rt6i_lwtstate);
+ rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2419,7 +2464,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT, NULL);
+ DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -2832,7 +2877,7 @@ static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->rt6i_lwtstate);
+ + lwtunnel_get_encap_size(rt->dst.lwtstate);
}
static int rt6_fill_node(struct net *net,
@@ -2985,7 +3030,7 @@ static int rt6_fill_node(struct net *net,
if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
goto nla_put_failure;
- lwtunnel_fill_encap(skb, rt->rt6i_lwtstate);
+ lwtunnel_fill_encap(skb, rt->dst.lwtstate);
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index a74013d3eceb..30caa289c5db 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,6 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/vrf.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -131,8 +132,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
- if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
+ if (skb_dst(skb)) {
+ oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
+ : skb_dst(skb)->dev->ifindex;
+ }
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b397f0aa9005..83a70688784b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -219,7 +219,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
#define BROADCAST_ONE 1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
-static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
+static int pfkey_broadcast(struct sk_buff *skb,
int broadcast_flags, struct sock *one_sk,
struct net *net)
{
@@ -244,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
* socket.
*/
if (pfk->promisc)
- pfkey_broadcast_one(skb, &skb2, allocation, sk);
+ pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
/* the exact target will be processed later */
if (sk == one_sk)
@@ -259,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
continue;
}
- err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
+ err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
/* Error is cleare after succecful sending to at least one
* registered KM */
@@ -269,7 +269,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+ err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
kfree_skb(skb2);
kfree_skb(skb);
@@ -292,7 +292,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
hdr = (struct sadb_msg *) pfk->dump.skb->data;
hdr->sadb_msg_seq = 0;
hdr->sadb_msg_errno = rc;
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = NULL;
}
@@ -333,7 +333,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
sizeof(uint64_t));
- pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1365,7 +1365,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
xfrm_state_put(x);
- pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
+ pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
return 0;
}
@@ -1452,7 +1452,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
hdr->sadb_msg_seq = c->seq;
hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
return 0;
}
@@ -1565,7 +1565,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
out_hdr->sadb_msg_reserved = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1670,7 +1670,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
return -ENOBUFS;
}
- pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk));
+ pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
return 0;
}
@@ -1689,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+ return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
}
static int key_notify_sa_flush(const struct km_event *c)
@@ -1710,7 +1710,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -1767,7 +1767,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -1847,7 +1847,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb
new_hdr->sadb_msg_errno = 0;
}
- pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
return 0;
}
@@ -2181,7 +2181,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = c->seq;
out_hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
+ pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
return 0;
}
@@ -2401,7 +2401,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
+ pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
err = 0;
out:
@@ -2655,7 +2655,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -2708,7 +2708,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -2770,7 +2770,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
memset(ext_hdrs, 0, sizeof(ext_hdrs));
@@ -2992,7 +2992,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
out_hdr->sadb_msg_seq = 0;
out_hdr->sadb_msg_pid = 0;
- pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
return 0;
}
@@ -3182,7 +3182,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
xfrm_ctx->ctx_len);
}
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
}
static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
@@ -3380,7 +3380,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
n_port->sadb_x_nat_t_port_port = sport;
n_port->sadb_x_nat_t_port_reserved = 0;
- return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
}
#ifdef CONFIG_NET_KEY_MIGRATE
@@ -3572,7 +3572,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
return 0;
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 247552a7f6c2..3ece7d1034c8 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
static inline void
minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
{
- int j = MAX_THR_RATES;
- struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats;
+ int j;
+ struct minstrel_rate_stats *tmp_mrs;
struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats;
- while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) >
- minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) {
- j--;
+ for (j = MAX_THR_RATES; j > 0; --j) {
tmp_mrs = &mi->r[tp_list[j - 1]].stats;
+ if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <=
+ minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))
+ break;
}
if (j < MAX_THR_RATES - 1)
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 276f8c992218..21e70bc9af98 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -48,7 +48,6 @@ int mpls_output(struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = NULL;
struct rt6_info *rt6 = NULL;
- struct lwtunnel_state *lwtstate = NULL;
int err = 0;
bool bos;
int i;
@@ -58,11 +57,9 @@ int mpls_output(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP)) {
ttl = ip_hdr(skb)->ttl;
rt = (struct rtable *)dst;
- lwtstate = rt->rt_lwtstate;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
ttl = ipv6_hdr(skb)->hop_limit;
rt6 = (struct rt6_info *)dst;
- lwtstate = rt6->rt6i_lwtstate;
} else {
goto drop;
}
@@ -72,12 +69,12 @@ int mpls_output(struct sock *sk, struct sk_buff *skb)
/* Find the output device */
out_dev = dst->dev;
if (!mpls_output_possible(out_dev) ||
- !lwtstate || skb_warn_if_lro(skb))
+ !dst->lwtstate || skb_warn_if_lro(skb))
goto drop;
skb_forward_csum(skb);
- tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+ tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
/* Verify the destination can hold the packet */
new_header_size = mpls_encap_size(tun_encap_info);
@@ -126,6 +123,7 @@ drop:
}
static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
+ unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
struct mpls_iptunnel_encap *tun_encap_info;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6eae69a698ed..3e1b4abf1897 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
+ select NF_DUP_IPV4
+ select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2a5a0704245c..0b939b7ad724 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -388,9 +388,6 @@ EXPORT_SYMBOL(nf_conntrack_destroy);
struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nfq_ct_hook);
-struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
-
#endif /* CONFIG_NF_CONNTRACK */
#ifdef CONFIG_NF_NAT_NEEDED
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 3b6929dec748..b32fb0dbe237 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -162,6 +162,17 @@ config IP_VS_FO
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_OVF
+ tristate "weighted overflow scheduling"
+ ---help---
+ The weighted overflow scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available and overflows to the next when active
+ connections exceed the node's weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 38b2723b2e3d..67f3f4389602 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
+obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 24c554201a76..1a23e91d50d8 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- mutex_lock(&ipvs->sync_mutex);
- if (cmd == IP_VS_SO_SET_STARTDAEMON)
- ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
- dm->syncid);
- else
+ if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+ struct ipvs_sync_daemon_cfg cfg;
+
+ memset(&cfg, 0, sizeof(cfg));
+ strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
+ sizeof(cfg.mcast_ifn));
+ cfg.syncid = dm->syncid;
+ rtnl_lock();
+ mutex_lock(&ipvs->sync_mutex);
+ ret = start_sync_thread(net, &cfg, dm->state);
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+ } else {
+ mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(net, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
+ mutex_unlock(&ipvs->sync_mutex);
+ }
goto out_dec;
}
@@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+ strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn));
- d[0].syncid = ipvs->master_syncid;
+ d[0].syncid = ipvs->mcfg.syncid;
}
if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+ strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn));
- d[1].syncid = ipvs->backup_syncid;
+ d[1].syncid = ipvs->bcfg.syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
.len = IP_VS_IFNAME_MAXLEN },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
+ [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
+ [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
+ [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
};
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
@@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
}
static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
- const char *mcast_ifn, __u32 syncid)
+ struct ipvs_sync_daemon_cfg *c)
{
struct nlattr *nl_daemon;
@@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
return -EMSGSIZE;
if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
- nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
- nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
+ nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
+ nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
+ nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
+ nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
+ nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
goto nla_put_failure;
+#ifdef CONFIG_IP_VS_IPV6
+ if (c->mcast_af == AF_INET6) {
+ if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
+ &c->mcast_group.in6))
+ goto nla_put_failure;
+ } else
+#endif
+ if (c->mcast_af == AF_INET &&
+ nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
+ c->mcast_group.ip))
+ goto nla_put_failure;
nla_nest_end(skb, nl_daemon);
return 0;
@@ -3288,7 +3317,7 @@ nla_put_failure:
}
static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
- const char *mcast_ifn, __u32 syncid,
+ struct ipvs_sync_daemon_cfg *c,
struct netlink_callback *cb)
{
void *hdr;
@@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
if (!hdr)
return -EMSGSIZE;
- if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+ if (ip_vs_genl_fill_daemon(skb, state, c))
goto nla_put_failure;
genlmsg_end(skb, hdr);
@@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
mutex_lock(&ipvs->sync_mutex);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ipvs->master_mcast_ifn,
- ipvs->master_syncid, cb) < 0)
+ &ipvs->mcfg, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
@@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ipvs->backup_mcast_ifn,
- ipvs->backup_syncid, cb) < 0)
+ &ipvs->bcfg, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3342,30 +3369,83 @@ nla_put_failure:
static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ipvs_sync_daemon_cfg c;
+ struct nlattr *a;
+ int ret;
+
+ memset(&c, 0, sizeof(c));
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ sizeof(c.mcast_ifn));
+ c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
+
+ a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
+ if (a)
+ c.sync_maxlen = nla_get_u16(a);
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
+ if (a) {
+ c.mcast_af = AF_INET;
+ c.mcast_group.ip = nla_get_in_addr(a);
+ if (!ipv4_is_multicast(c.mcast_group.ip))
+ return -EINVAL;
+ } else {
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
+ if (a) {
+#ifdef CONFIG_IP_VS_IPV6
+ int addr_type;
+
+ c.mcast_af = AF_INET6;
+ c.mcast_group.in6 = nla_get_in6_addr(a);
+ addr_type = ipv6_addr_type(&c.mcast_group.in6);
+ if (!(addr_type & IPV6_ADDR_MULTICAST))
+ return -EINVAL;
+#else
+ return -EAFNOSUPPORT;
+#endif
+ }
+ }
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
+ if (a)
+ c.mcast_port = nla_get_u16(a);
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
+ if (a)
+ c.mcast_ttl = nla_get_u8(a);
/* The synchronization protocol is incompatible with mixed family
* services
*/
- if (net_ipvs(net)->mixed_address_family_dests > 0)
+ if (ipvs->mixed_address_family_dests > 0)
return -EINVAL;
- return start_sync_thread(net,
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
- nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+ rtnl_lock();
+ mutex_lock(&ipvs->sync_mutex);
+ ret = start_sync_thread(net, &c,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+ return ret;
}
static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ int ret;
+
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(net,
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_lock(&ipvs->sync_mutex);
+ ret = stop_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_unlock(&ipvs->sync_mutex);
+ return ret;
}
static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
@@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{
- int ret = 0, cmd;
+ int ret = -EINVAL, cmd;
struct net *net;
struct netns_ipvs *ipvs;
@@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
- mutex_lock(&ipvs->sync_mutex);
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON],
- ip_vs_daemon_policy)) {
- ret = -EINVAL;
+ ip_vs_daemon_policy))
goto out;
- }
if (cmd == IPVS_CMD_NEW_DAEMON)
ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else
ret = ip_vs_genl_del_daemon(net, daemon_attrs);
-out:
- mutex_unlock(&ipvs->sync_mutex);
}
+
+out:
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 5882bbfd198c..136184572fc9 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
&tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
new file mode 100644
index 000000000000..f7d62c3b7329
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -0,0 +1,86 @@
+/*
+ * IPVS: Overflow-Connection Scheduling module
+ *
+ * Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Scheduler implements "overflow" loadbalancing according to number of active
+ * connections , will keep all conections to the node with the highest weight
+ * and overflow to the next node if the number of connections exceeds the node's
+ * weight.
+ * Note that this scheduler might not be suitable for UDP because it only uses
+ * active connections
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* OVF Connection scheduling */
+static struct ip_vs_dest *
+ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *h = NULL;
+ int hw = 0, w;
+
+ IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
+ /* select the node with highest weight, go to next in line if active
+ * connections exceed weight
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ w = atomic_read(&dest->weight);
+ if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+ atomic_read(&dest->activeconns) > w ||
+ w == 0)
+ continue;
+ if (!h || w > hw) {
+ h = dest;
+ hw = w;
+ }
+ }
+
+ if (h) {
+ IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
+ IP_VS_DBG_ADDR(h->af, &h->addr),
+ ntohs(h->port),
+ atomic_read(&h->activeconns),
+ atomic_read(&h->weight));
+ return h;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
+ .name = "ovf",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
+ .schedule = ip_vs_ovf_schedule,
+};
+
+static int __init ip_vs_ovf_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+}
+
+static void __exit ip_vs_ovf_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_ovf_init);
+module_exit(ip_vs_ovf_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d99ad93eb855..43f140950075 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -262,6 +262,11 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
+union ipvs_sockaddr {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+};
+
struct ip_vs_sync_buff {
struct list_head list;
unsigned long firstuse;
@@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
* Create a new sync buffer for Version 1 proto.
*/
static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
+ ipvs->mcfg.sync_maxlen);
+ sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) {
kfree(sb);
return NULL;
}
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
sb->mesg->version = SYNC_PROTO_VER;
- sb->mesg->syncid = ipvs->master_syncid;
+ sb->mesg->syncid = ipvs->mcfg.syncid;
sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
sb->mesg->nr_conns = 0;
sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
- sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+ sb->end = (unsigned char *)sb->mesg + len;
sb->firstuse = jiffies;
return sb;
@@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
* Create a new sync buffer for Version 0 proto.
*/
static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
{
struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg_v0 *mesg;
@@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
+ ipvs->mcfg.sync_maxlen);
+ sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) {
kfree(sb);
return NULL;
}
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0;
- mesg->syncid = ipvs->master_syncid;
+ mesg->syncid = ipvs->mcfg.syncid;
mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
- sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+ sb->end = (unsigned char *)mesg + len;
sb->firstuse = jiffies;
return sb;
}
@@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
struct ip_vs_sync_buff *buff;
struct ipvs_master_sync_state *ms;
int id;
- int len;
+ unsigned int len;
if (unlikely(cp->af != AF_INET))
return;
@@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
id = select_master_thread_id(ipvs, cp);
ms = &ipvs->ms[id];
buff = ms->sync_buff;
+ len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+ SIMPLE_CONN_SIZE;
if (buff) {
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
/* Send buffer if it is for v1 */
- if (!m->nr_conns) {
+ if (buff->head + len > buff->end || !m->nr_conns) {
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
buff = NULL;
}
}
if (!buff) {
- buff = ip_vs_sync_buff_create_v0(ipvs);
+ buff = ip_vs_sync_buff_create_v0(ipvs, len);
if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
@@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
ms->sync_buff = buff;
}
- len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
- SIMPLE_CONN_SIZE;
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
s = (struct ip_vs_sync_conn_v0 *) buff->head;
@@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
m->nr_conns++;
m->size = htons(ntohs(m->size) + len);
buff->head += len;
-
- /* check if there is a space for next one */
- if (buff->head + FULL_CONN_SIZE > buff->end) {
- sb_queue_tail(ipvs, ms);
- ms->sync_buff = NULL;
- }
spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
@@ -694,7 +697,7 @@ sloop:
}
if (!buff) {
- buff = ip_vs_sync_buff_create(ipvs);
+ buff = ip_vs_sync_buff_create(ipvs, len);
if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
@@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* SyncID sanity check */
- if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+ if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
return;
}
@@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
inet->mc_loop = loop ? 1 : 0;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MULTICAST_LOOP */
+ np->mc_loop = loop ? 1 : 0;
+ }
+#endif
release_sock(sk);
}
@@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
lock_sock(sk);
inet->mc_ttl = ttl;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MULTICAST_HOPS */
+ np->mcast_hops = ttl;
+ }
+#endif
+ release_sock(sk);
+}
+
+/* Control fragmentation of messages */
+static void set_mcast_pmtudisc(struct sock *sk, int val)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
+ lock_sock(sk);
+ inet->pmtudisc = val;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MTU_DISCOVER */
+ np->pmtudisc = val;
+ }
+#endif
release_sock(sk);
}
@@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname)
lock_sock(sk);
inet->mc_index = dev->ifindex;
/* inet->mc_addr = 0; */
- release_sock(sk);
-
- return 0;
-}
-
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
-/*
- * Set the maximum length of sync message according to the
- * specified interface's MTU.
- */
-static int set_sync_mesg_maxlen(struct net *net, int sync_state)
-{
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct net_device *dev;
- int num;
-
- if (sync_state == IP_VS_STATE_MASTER) {
- dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
- if (!dev)
- return -ENODEV;
-
- num = (dev->mtu - sizeof(struct iphdr) -
- sizeof(struct udphdr) -
- SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
- SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
- IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", ipvs->send_mesg_maxlen);
- } else if (sync_state == IP_VS_STATE_BACKUP) {
- dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
- if (!dev)
- return -ENODEV;
-
- ipvs->recv_mesg_maxlen = dev->mtu -
- sizeof(struct iphdr) - sizeof(struct udphdr);
- IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", ipvs->recv_mesg_maxlen);
+ /* IPV6_MULTICAST_IF */
+ np->mcast_oif = dev->ifindex;
}
+#endif
+ release_sock(sk);
return 0;
}
@@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
- rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
- rtnl_unlock();
return ret;
}
+#ifdef CONFIG_IP_VS_IPV6
+static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
+ char *ifname)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+ int ret;
+
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
+ return -ENODEV;
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ return -EINVAL;
+
+ lock_sock(sk);
+ ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
+ release_sock(sk);
+
+ return ret;
+}
+#endif
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
@@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
}
+static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
+ struct ipvs_sync_daemon_cfg *c, int id)
+{
+ if (AF_INET6 == c->mcast_af) {
+ sa->in6 = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(c->mcast_port + id),
+ };
+ sa->in6.sin6_addr = c->mcast_group.in6;
+ *salen = sizeof(sa->in6);
+ } else {
+ sa->in = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_port = htons(c->mcast_port + id),
+ };
+ sa->in.sin_addr = c->mcast_group.in;
+ *salen = sizeof(sa->in);
+ }
+}
+
/*
* Set up sending multicast socket over UDP
*/
@@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id)
{
struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
- struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
- };
+ union ipvs_sockaddr mcast_addr;
struct socket *sock;
- int result;
+ int result, salen;
/* First create a socket */
- result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+ IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
+ result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
}
set_mcast_loop(sock->sk, 0);
- set_mcast_ttl(sock->sk, 1);
+ set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
+ /* Allow fragmentation if MTU changes */
+ set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
result = sysctl_sync_sock_size(ipvs);
if (result > 0)
set_sock_size(sock->sk, 1, result);
- result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
+ if (AF_INET == ipvs->mcfg.mcast_af)
+ result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+ else
+ result = 0;
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
}
+ get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- sizeof(struct sockaddr), 0);
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
@@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id)
{
struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
- struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
- };
+ union ipvs_sockaddr mcast_addr;
struct socket *sock;
- int result;
+ int result, salen;
/* First create a socket */
- result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+ IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
@@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id)
if (result > 0)
set_sock_size(sock->sk, 0, result);
- result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
- sizeof(struct sockaddr));
+ get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+ result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
}
/* join the multicast group */
- result = join_mcast_group(sock->sk,
- (struct in_addr *) &mcast_addr.sin_addr,
- ipvs->backup_mcast_ifn);
+#ifdef CONFIG_IP_VS_IPV6
+ if (ipvs->bcfg.mcast_af == AF_INET6)
+ result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
+ ipvs->bcfg.mcast_ifn);
+ else
+#endif
+ result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
+ ipvs->bcfg.mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data)
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d, id = %d\n",
- ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
+ ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
for (;;) {
sb = next_sync_buff(ipvs, ms);
@@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data)
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d, id = %d\n",
- ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
+ ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data)
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- ipvs->recv_mesg_maxlen);
+ ipvs->bcfg.sync_maxlen);
if (len <= 0) {
if (len != -EAGAIN)
pr_err("receiving message error\n");
@@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
+ int state)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **array = NULL, *task;
struct socket *sock;
struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net_device *dev;
char *name;
int (*threadfn)(void *data);
- int id, count;
+ int id, count, hlen;
int result = -ENOMEM;
+ u16 mtu, min_mtu;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
@@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
} else
count = ipvs->threads_mask + 1;
+ if (c->mcast_af == AF_UNSPEC) {
+ c->mcast_af = AF_INET;
+ c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
+ }
+ if (!c->mcast_port)
+ c->mcast_port = IP_VS_SYNC_PORT;
+ if (!c->mcast_ttl)
+ c->mcast_ttl = 1;
+
+ dev = __dev_get_by_name(net, c->mcast_ifn);
+ if (!dev) {
+ pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
+ return -ENODEV;
+ }
+ hlen = (AF_INET6 == c->mcast_af) ?
+ sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
+ sizeof(struct iphdr) + sizeof(struct udphdr);
+ mtu = (state == IP_VS_STATE_BACKUP) ?
+ clamp(dev->mtu, 1500U, 65535U) : 1500U;
+ min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
+
+ if (c->sync_maxlen)
+ c->sync_maxlen = clamp_t(unsigned int,
+ c->sync_maxlen, min_mtu,
+ 65535 - hlen);
+ else
+ c->sync_maxlen = mtu - hlen;
+
if (state == IP_VS_STATE_MASTER) {
if (ipvs->ms)
return -EEXIST;
- strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
- sizeof(ipvs->master_mcast_ifn));
- ipvs->master_syncid = syncid;
+ ipvs->mcfg = *c;
name = "ipvs-m:%d:%d";
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
if (ipvs->backup_threads)
return -EEXIST;
- strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
- sizeof(ipvs->backup_mcast_ifn));
- ipvs->backup_syncid = syncid;
+ ipvs->bcfg = *c;
name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup;
} else {
@@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
if (!array)
goto out;
}
- set_sync_mesg_maxlen(net, state);
tinfo = NULL;
for (id = 0; id < count; id++) {
@@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
tinfo->net = net;
tinfo->sock = sock;
if (state == IP_VS_STATE_BACKUP) {
- tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
+ tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
goto outtinfo;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3c20d02aee73..ac3be9b0629b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
unsigned int nf_conntrack_hash_rnd __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
{
unsigned int n;
@@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
* three bytes manually.
*/
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+ return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
(((__force __u16)tuple->dst.u.all << 16) |
tuple->dst.protonum));
}
@@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net)
}
static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size)
+ unsigned int size)
{
- return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
+ return __hash_bucket(hash_conntrack_raw(tuple), size);
}
-static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
+static inline u_int32_t hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size);
+ return __hash_conntrack(tuple, net->ct.htable_size);
}
bool
@@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
}
/* Released via destroy_conntrack() */
-struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
+ gfp_t flags)
{
struct nf_conn *tmpl;
@@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- if (zone) {
- struct nf_conntrack_zone *nf_ct_zone;
+ if (nf_ct_zone_add(tmpl, flags, zone) < 0)
+ goto out_free;
- nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags);
- if (!nf_ct_zone)
- goto out_free;
- nf_ct_zone->id = zone;
- }
-#endif
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
out_free:
kfree(tmpl);
return NULL;
-#endif
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
@@ -373,7 +366,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
- u16 zone = nf_ct_zone(ct);
unsigned int sequence;
nf_ct_helper_destroy(ct);
@@ -381,9 +373,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
local_bh_disable();
do {
sequence = read_seqcount_begin(&net->ct.generation);
- hash = hash_conntrack(net, zone,
+ hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -431,8 +423,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
- const struct nf_conntrack_tuple *tuple,
- u16 zone)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -440,8 +432,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
* so we need to check that the conntrack is confirmed
*/
return nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(ct) == zone &&
- nf_ct_is_confirmed(ct);
+ nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
+ nf_ct_is_confirmed(ct);
}
/*
@@ -450,7 +442,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
* and recheck nf_ct_tuple_equal(tuple, &h->tuple)
*/
static struct nf_conntrack_tuple_hash *
-____nf_conntrack_find(struct net *net, u16 zone,
+____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
@@ -486,7 +478,7 @@ begin:
/* Find a connection corresponding to a tuple. */
static struct nf_conntrack_tuple_hash *
-__nf_conntrack_find_get(struct net *net, u16 zone,
+__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
@@ -513,11 +505,11 @@ begin:
}
struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, zone));
+ hash_conntrack_raw(tuple));
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -536,11 +528,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- u16 zone;
unsigned int sequence;
zone = nf_ct_zone(ct);
@@ -548,9 +540,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
local_bh_disable();
do {
sequence = read_seqcount_begin(&net->ct.generation);
- hash = hash_conntrack(net, zone,
+ hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -558,12 +550,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
add_timer(&ct->timeout);
@@ -588,6 +582,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ const struct nf_conntrack_zone *zone;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -596,7 +591,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
- u16 zone;
unsigned int sequence;
ct = nf_ct_get(skb, &ctinfo);
@@ -617,7 +611,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* reuse the hash saved before */
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = hash_bucket(hash, net);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -649,12 +643,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
/* Timer relative to confirmation time, not original
@@ -707,11 +703,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
{
struct net *net = nf_ct_net(ignored_conntrack);
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nf_conn *ct;
- u16 zone = nf_ct_zone(ignored_conntrack);
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int hash;
+
+ zone = nf_ct_zone(ignored_conntrack);
+ hash = hash_conntrack(net, tuple);
/* Disable BHs the entire time since we need to disable them at
* least once for the stats anyway.
@@ -721,7 +720,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
ct = nf_ct_tuplehash_to_ctrack(h);
if (ct != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(ct) == zone) {
+ nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
NF_CT_STAT_INC(net, found);
rcu_read_unlock_bh();
return 1;
@@ -810,7 +809,8 @@ void init_nf_conntrack_hash_rnd(void)
}
static struct nf_conn *
-__nf_conntrack_alloc(struct net *net, u16 zone,
+__nf_conntrack_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp, u32 hash)
@@ -820,7 +820,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
if (unlikely(!nf_conntrack_hash_rnd)) {
init_nf_conntrack_hash_rnd();
/* recompute the hash as nf_conntrack_hash_rnd is initialized */
- hash = hash_conntrack_raw(orig, zone);
+ hash = hash_conntrack_raw(orig);
}
/* We don't want any race condition at early drop stage */
@@ -840,10 +840,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
* SLAB_DESTROY_BY_RCU.
*/
ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
- if (ct == NULL) {
- atomic_dec(&net->ct.count);
- return ERR_PTR(-ENOMEM);
- }
+ if (ct == NULL)
+ goto out;
+
spin_lock_init(&ct->lock);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -857,31 +856,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
offsetof(struct nf_conn, __nfct_init_offset[0]));
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- if (zone) {
- struct nf_conntrack_zone *nf_ct_zone;
- nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
- if (!nf_ct_zone)
- goto out_free;
- nf_ct_zone->id = zone;
- }
-#endif
+ if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
+ goto out_free;
+
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
return ct;
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
out_free:
- atomic_dec(&net->ct.count);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+out:
+ atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
-#endif
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp)
@@ -923,8 +915,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_ecache *ecache;
struct nf_conntrack_expect *exp = NULL;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
+ struct nf_conntrack_zone tmp;
unsigned int *timeouts;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -932,6 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
@@ -1026,10 +1020,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_zone tmp;
struct nf_conn *ct;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
@@ -1040,7 +1035,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- hash = hash_conntrack_raw(&tuple, zone);
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+ hash = hash_conntrack_raw(&tuple);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1290,6 +1286,13 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+/* Built-in default zone used e.g. by modules. */
+const struct nf_conntrack_zone nf_ct_zone_dflt = {
+ .id = NF_CT_DEFAULT_ZONE_ID,
+ .dir = NF_CT_DEFAULT_ZONE_DIR,
+};
+EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
.len = sizeof(struct nf_conntrack_zone),
@@ -1596,8 +1599,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
- bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize);
+ bucket = __hash_conntrack(&h->tuple, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index b45a4223cb05..acf5c7b3f378 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, u16 zone,
+__nf_ct_expect_find(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
@@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone,
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone(i->master) == zone)
+ nf_ct_zone_equal_any(i->master, zone))
return i;
}
return NULL;
@@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, u16 zone,
+nf_ct_expect_find_get(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
@@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, u16 zone,
+nf_ct_find_expectation(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
@@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone,
hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone(i->master) == zone) {
+ nf_ct_zone_equal_any(i->master, zone)) {
exp = i;
break;
}
@@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
}
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
- nf_ct_zone(a->master) == nf_ct_zone(b->master);
+ nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
return a->master == b->master && a->class == b->class &&
- nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
- nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
- nf_ct_zone(a->master) == nf_ct_zone(b->master);
+ nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+ nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+ nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
/* Generally a bad idea to call this: could have matched already. */
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index bb53f120e79c..3ce5c314ea4b 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -14,6 +14,8 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
+static spinlock_t nf_connlabels_lock;
+
static unsigned int label_bits(const struct nf_conn_labels *l)
{
unsigned int longs = l->words;
@@ -48,7 +50,6 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit)
}
EXPORT_SYMBOL_GPL(nf_connlabel_set);
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static void replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
@@ -89,7 +90,35 @@ int nf_connlabels_replace(struct nf_conn *ct,
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_replace);
-#endif
+
+int nf_connlabels_get(struct net *net, unsigned int n_bits)
+{
+ size_t words;
+
+ if (n_bits > (NF_CT_LABELS_MAX_SIZE * BITS_PER_BYTE))
+ return -ERANGE;
+
+ words = BITS_TO_LONGS(n_bits);
+
+ spin_lock(&nf_connlabels_lock);
+ net->ct.labels_used++;
+ if (words > net->ct.label_words)
+ net->ct.label_words = words;
+ spin_unlock(&nf_connlabels_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_get);
+
+void nf_connlabels_put(struct net *net)
+{
+ spin_lock(&nf_connlabels_lock);
+ net->ct.labels_used--;
+ if (net->ct.labels_used == 0)
+ net->ct.label_words = 0;
+ spin_unlock(&nf_connlabels_lock);
+}
+EXPORT_SYMBOL_GPL(nf_connlabels_put);
static struct nf_ct_ext_type labels_extend __read_mostly = {
.len = sizeof(struct nf_conn_labels),
@@ -99,6 +128,7 @@ static struct nf_ct_ext_type labels_extend __read_mostly = {
int nf_conntrack_labels_init(void)
{
+ spin_lock_init(&nf_connlabels_lock);
return nf_ct_extend_register(&labels_extend);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6b8b0abbfab4..94a66541e0b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -128,6 +128,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
}
static inline int
+ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype,
+ const struct nf_conntrack_zone *zone, int dir)
+{
+ if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir)
+ return 0;
+ if (nla_put_be16(skb, attrtype, htons(zone->id)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
{
if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status)))
@@ -458,6 +472,7 @@ static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
@@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct) &&
- nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
if (ctnetlink_dump_status(skb, ct) < 0 ||
@@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
- + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
@@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
+ const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->res_id = 0;
rcu_read_lock();
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct) &&
- nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
if (ctnetlink_dump_id(skb, ct) < 0)
@@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
return ret;
}
+static int
+ctnetlink_parse_zone(const struct nlattr *attr,
+ struct nf_conntrack_zone *zone)
+{
+ nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID,
+ NF_CT_DEFAULT_ZONE_DIR, 0);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (attr)
+ zone->id = ntohs(nla_get_be16(attr));
+#else
+ if (attr)
+ return -EOPNOTSUPP;
+#endif
+ return 0;
+}
+
+static int
+ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type,
+ struct nf_conntrack_zone *zone)
+{
+ int ret;
+
+ if (zone->id != NF_CT_DEFAULT_ZONE_ID)
+ return -EINVAL;
+
+ ret = ctnetlink_parse_zone(attr, zone);
+ if (ret < 0)
+ return ret;
+
+ if (type == CTA_TUPLE_REPLY)
+ zone->dir = NF_CT_ZONE_DIR_REPL;
+ else
+ zone->dir = NF_CT_ZONE_DIR_ORIG;
+
+ return 0;
+}
+
static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
[CTA_TUPLE_IP] = { .type = NLA_NESTED },
[CTA_TUPLE_PROTO] = { .type = NLA_NESTED },
+ [CTA_TUPLE_ZONE] = { .type = NLA_U16 },
};
static int
ctnetlink_parse_tuple(const struct nlattr * const cda[],
struct nf_conntrack_tuple *tuple,
- enum ctattr_type type, u_int8_t l3num)
+ enum ctattr_type type, u_int8_t l3num,
+ struct nf_conntrack_zone *zone)
{
struct nlattr *tb[CTA_TUPLE_MAX+1];
int err;
@@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
if (err < 0)
return err;
+ if (tb[CTA_TUPLE_ZONE]) {
+ if (!zone)
+ return -EINVAL;
+
+ err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE],
+ type, zone);
+ if (err < 0)
+ return err;
+ }
+
/* orig and expect tuples get DIR_ORIGINAL */
if (type == CTA_TUPLE_REPLY)
tuple->dst.dir = IP_CT_DIR_REPLY;
@@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
return 0;
}
-static int
-ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
-{
- if (attr)
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- *zone = ntohs(nla_get_be16(attr));
-#else
- return -EOPNOTSUPP;
-#endif
- else
- *zone = 0;
-
- return 0;
-}
-
static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
[CTA_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
@@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+ u3, &zone);
else if (cda[CTA_TUPLE_REPLY])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+ u3, &zone);
else {
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
@@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct sk_buff *skb2 = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+ u3, &zone);
else if (cda[CTA_TUPLE_REPLY])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+ u3, &zone);
else
return -EINVAL;
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
}
static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net, u16 zone,
+ctnetlink_create_conntrack(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nlattr * const cda[],
struct nf_conntrack_tuple *otuple,
struct nf_conntrack_tuple *rtuple,
@@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
struct nf_conntrack_tuple_hash *master_h;
struct nf_conn *master_ct;
- err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER,
+ u3, NULL);
if (err < 0)
goto err2;
@@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_conn *ct;
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG]) {
- err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG,
+ u3, &zone);
if (err < 0)
return err;
}
if (cda[CTA_TUPLE_REPLY]) {
- err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY,
+ u3, &zone);
if (err < 0)
return err;
}
if (cda[CTA_TUPLE_ORIG])
- h = nf_conntrack_find_get(net, zone, &otuple);
+ h = nf_conntrack_find_get(net, &zone, &otuple);
else if (cda[CTA_TUPLE_REPLY])
- h = nf_conntrack_find_get(net, zone, &rtuple);
+ h = nf_conntrack_find_get(net, &zone, &rtuple);
if (h == NULL) {
err = -ENOENT;
@@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
return -EINVAL;
- ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
+ ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
&rtuple, u3);
if (IS_ERR(ct))
return PTR_ERR(ct);
@@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
- + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
+ ctnetlink_proto_size(ct)
;
@@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
static int
ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
rcu_read_lock();
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct)) {
- if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
- goto nla_put_failure;
- }
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
+ goto nla_put_failure;
if (ctnetlink_dump_id(skb, ct) < 0)
goto nla_put_failure;
@@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
int err;
err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
- nf_ct_l3num(ct));
+ nf_ct_l3num(ct), NULL);
if (err < 0)
return err;
return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
- nf_ct_l3num(ct));
+ nf_ct_l3num(ct), NULL);
}
static int
@@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- u16 zone = 0;
+ struct nf_conntrack_zone zone;
struct netlink_dump_control c = {
.dump = ctnetlink_exp_ct_dump_table,
.done = ctnetlink_exp_done,
};
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
if (err < 0)
return err;
- if (cda[CTA_EXPECT_ZONE]) {
- err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
- if (err < 0)
- return err;
- }
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
struct sk_buff *skb2;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_EXPECT_TUPLE])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
else if (cda[CTA_EXPECT_MASTER])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
else
return -EINVAL;
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(net, zone, &tuple);
+ exp = nf_ct_expect_find_get(net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
+ struct nf_conntrack_zone zone;
unsigned int i;
- u16 zone;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(net, zone, &tuple);
+ exp = nf_ct_expect_find_get(net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
return -EINVAL;
err = ctnetlink_parse_tuple((const struct nlattr * const *)tb,
- &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3);
+ &nat_tuple, CTA_EXPECT_NAT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
@@ -2937,7 +3022,8 @@ err_out:
}
static int
-ctnetlink_create_expect(struct net *net, u16 zone,
+ctnetlink_create_expect(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nlattr * const cda[],
u_int8_t u3, u32 portid, int report)
{
@@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone,
int err;
/* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK,
+ u3, NULL);
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
if (err < 0)
return err;
@@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_expect *exp;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (!cda[CTA_EXPECT_TUPLE]
@@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = __nf_ct_expect_find(net, zone, &tuple);
-
+ exp = __nf_ct_expect_find(net, &zone, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
- err = ctnetlink_create_expect(net, zone, cda,
- u3,
+ err = ctnetlink_create_expect(net, &zone, cda, u3,
NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 825c3e3f8305..5588c7ae1ac2 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_expect *exp;
struct nf_conn *sibling;
- u16 zone = nf_ct_zone(ct);
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
+ zone = nf_ct_zone(ct);
h = nf_conntrack_find_get(net, zone, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index ce3e840c8704..dff0f0cc59e4 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
ntohl(sack->end_seq), ntohl(new_end_seq));
inet_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
+ sack->start_seq, new_start_seq, false);
inet_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
+ sack->end_seq, new_end_seq, false);
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
@@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
newseq = htonl(ntohl(tcph->seq) + seqoff);
newack = htonl(ntohl(tcph->ack_seq) - ackoff);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
+ false);
pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fc823fa5dcf5..1fb3cacc04e1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+ int dir)
+{
+ const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+ if (zone->dir != dir)
+ return;
+ switch (zone->dir) {
+ case NF_CT_DEFAULT_ZONE_DIR:
+ seq_printf(s, "zone=%u ", zone->id);
+ break;
+ case NF_CT_ZONE_DIR_ORIG:
+ seq_printf(s, "zone-orig=%u ", zone->id);
+ break;
+ case NF_CT_ZONE_DIR_REPL:
+ seq_printf(s, "zone-reply=%u ", zone->id);
+ break;
+ default:
+ break;
+ }
+}
+#else
+static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+ int dir)
+{
+}
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
{
@@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto);
+ ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
+
if (seq_has_overflowed(s))
goto release;
@@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
+ ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
+
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
@@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
#endif
ct_show_secctx(s, ct);
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- seq_printf(s, "zone=%u ", nf_ct_zone(ct));
-#endif
-
+ ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4e0b47831d43..5113dfd39df9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
-hash_by_src(const struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
+ tuple->dst.protonum ^ nf_conntrack_hash_rnd);
return reciprocal_scale(hash, net->ct.nat_htable_size);
}
@@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct,
/* Only called for SRC manip */
static int
-find_appropriate_src(struct net *net, u16 zone,
+find_appropriate_src(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
- unsigned int h = hash_by_src(net, zone, tuple);
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
ct = nat->ct;
- if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
+ if (same_src(ct, tuple) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone,
* the ip with the lowest src-ip/dst-ip/proto usage.
*/
static void
-find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
+find_best_ips_proto(const struct nf_conntrack_zone *zone,
+ struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
@@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
*/
j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
range->flags & NF_NAT_RANGE_PERSISTENT ?
- 0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
+ 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
full_range = false;
for (i = 0; i <= max; i++) {
@@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
- u16 zone = nf_ct_zone(ct);
+
+ zone = nf_ct_zone(ct);
rcu_read_lock();
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
@@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct,
if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
- srchash = hash_by_src(net, nf_ct_zone(ct),
+ srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate extension aera */
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index b8067b53ff3a..15c47b246d0d 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb,
l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- 0);
+ false);
return true;
}
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 37f5505f4529..4f8820fc5148 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb,
return true;
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+ inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index b0ede2f0d8bc..b1e627227b6e 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb,
l3proto->csum_update(skb, iphdroff, &hdr->check,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
- 0);
+ false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
}
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 368f14e01e75..58340c97bd83 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb,
}
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index d7f168527903..888b9558415e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -17,10 +17,12 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_tcpudp.h>
#include <linux/netfilter/xt_SYNPROXY.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_zones.h>
int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -186,7 +188,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
const struct nf_conn_synproxy *synproxy)
{
unsigned int optoff, optend;
- u32 *ptr, old;
+ __be32 *ptr, old;
if (synproxy->tsoff == 0)
return 1;
@@ -214,18 +216,18 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
if (op[0] == TCPOPT_TIMESTAMP &&
op[1] == TCPOLEN_TIMESTAMP) {
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- ptr = (u32 *)&op[2];
+ ptr = (__be32 *)&op[2];
old = *ptr;
*ptr = htonl(ntohl(*ptr) -
synproxy->tsoff);
} else {
- ptr = (u32 *)&op[6];
+ ptr = (__be32 *)&op[6];
old = *ptr;
*ptr = htonl(ntohl(*ptr) +
synproxy->tsoff);
}
inet_proto_csum_replace4(&th->check, skb,
- old, *ptr, 0);
+ old, *ptr, false);
return 1;
}
optoff += op[1];
@@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net)
struct nf_conn *ct;
int err = -ENOMEM;
- ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
+ ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL);
if (!ct)
goto err1;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c18af2f63eef..fefbf5f0b28d 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -27,8 +27,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
-static LIST_HEAD(nfnl_acct_list);
-
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
@@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
+ struct net *net = sock_net(nfnl);
char *acct_name;
unsigned int size = 0;
u32 flags = 0;
@@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
if (strlen(acct_name) == 0)
return -EINVAL;
- list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+ list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
@@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
atomic_set(&nfacct->refcnt, 1);
- list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+ list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -185,6 +184,7 @@ nla_put_failure:
static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct nf_acct *cur, *last;
const struct nfacct_filter *filter = cb->data;
@@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (last) {
if (cur != last)
continue;
@@ -257,6 +257,7 @@ static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
+ struct net *net = sock_net(nfnl);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
@@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
- list_for_each_entry(cur, &nfnl_acct_list, head) {
+ list_for_each_entry(cur, &net->nfnl_acct_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
@@ -336,19 +337,20 @@ static int
nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
+ struct net *net = sock_net(nfnl);
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
if (!tb[NFACCT_NAME]) {
- list_for_each_entry(cur, &nfnl_acct_list, head)
+ list_for_each_entry(cur, &net->nfnl_acct_list, head)
nfnl_acct_try_del(cur);
return 0;
}
acct_name = nla_data(tb[NFACCT_NAME]);
- list_for_each_entry(cur, &nfnl_acct_list, head) {
+ list_for_each_entry(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
@@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
-struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
{
struct nf_acct *cur, *acct = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
continue;
@@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- atomic_dec(&acct->refcnt);
+ if (atomic_dec_and_test(&acct->refcnt))
+ kfree_rcu(acct, rcu_head);
+
module_put(THIS_MODULE);
}
EXPORT_SYMBOL_GPL(nfnl_acct_put);
@@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+static int __net_init nfnl_acct_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nfnl_acct_list);
+
+ return 0;
+}
+
+static void __net_exit nfnl_acct_net_exit(struct net *net)
+{
+ struct nf_acct *cur, *tmp;
+
+ list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
+ list_del_rcu(&cur->head);
+
+ if (atomic_dec_and_test(&cur->refcnt))
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+static struct pernet_operations nfnl_acct_ops = {
+ .init = nfnl_acct_net_init,
+ .exit = nfnl_acct_net_exit,
+};
+
static int __init nfnl_acct_init(void)
{
int ret;
+ ret = register_pernet_subsys(&nfnl_acct_ops);
+ if (ret < 0) {
+ pr_err("nfnl_acct_init: failed to register pernet ops\n");
+ goto err_out;
+ }
+
pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
- goto err_out;
+ goto cleanup_pernet;
}
return 0;
+
+cleanup_pernet:
+ unregister_pernet_subsys(&nfnl_acct_ops);
err_out:
return ret;
}
static void __exit nfnl_acct_exit(void)
{
- struct nf_acct *cur, *tmp;
-
pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
-
- list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
- list_del_rcu(&cur->head);
- /* We are sure that our objects have no clients at this point,
- * it's safe to release them all without checking refcnt. */
- kfree_rcu(cur, rcu_head);
- }
+ unregister_pernet_subsys(&nfnl_acct_ops);
}
module_init(nfnl_acct_init);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 17591239229f..1067fb4c1ffa 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -18,39 +18,59 @@
#include <net/netfilter/nf_tables.h>
struct nft_counter {
- seqlock_t lock;
u64 bytes;
u64 packets;
};
+struct nft_counter_percpu {
+ struct nft_counter counter;
+ struct u64_stats_sync syncp;
+};
+
+struct nft_counter_percpu_priv {
+ struct nft_counter_percpu __percpu *counter;
+};
+
static void nft_counter_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_counter *priv = nft_expr_priv(expr);
-
- write_seqlock_bh(&priv->lock);
- priv->bytes += pkt->skb->len;
- priv->packets++;
- write_sequnlock_bh(&priv->lock);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu *this_cpu;
+
+ local_bh_disable();
+ this_cpu = this_cpu_ptr(priv->counter);
+ u64_stats_update_begin(&this_cpu->syncp);
+ this_cpu->counter.bytes += pkt->skb->len;
+ this_cpu->counter.packets++;
+ u64_stats_update_end(&this_cpu->syncp);
+ local_bh_enable();
}
static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
- struct nft_counter *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu *cpu_stats;
+ struct nft_counter total;
+ u64 bytes, packets;
unsigned int seq;
- u64 bytes;
- u64 packets;
-
- do {
- seq = read_seqbegin(&priv->lock);
- bytes = priv->bytes;
- packets = priv->packets;
- } while (read_seqretry(&priv->lock, seq));
-
- if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
- goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+ int cpu;
+
+ memset(&total, 0, sizeof(total));
+ for_each_possible_cpu(cpu) {
+ cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ bytes = cpu_stats->counter.bytes;
+ packets = cpu_stats->counter.packets;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+
+ total.packets += packets;
+ total.bytes += bytes;
+ }
+
+ if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
+ nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
goto nla_put_failure;
return 0;
@@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_counter *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+
+ cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
+ if (cpu_stats == NULL)
+ return ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ if (tb[NFTA_COUNTER_PACKETS]) {
+ this_cpu->counter.packets =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ }
+ if (tb[NFTA_COUNTER_BYTES]) {
+ this_cpu->counter.bytes =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ }
+ preempt_enable();
+ priv->counter = cpu_stats;
+ return 0;
+}
- if (tb[NFTA_COUNTER_PACKETS])
- priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
- if (tb[NFTA_COUNTER_BYTES])
- priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+static void nft_counter_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- seqlock_init(&priv->lock);
- return 0;
+ free_percpu(priv->counter);
}
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
.eval = nft_counter_eval,
.init = nft_counter_init,
+ .destroy = nft_counter_destroy,
.dump = nft_counter_dump,
};
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 435c1ccd6c0e..5d67938f8b2f 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -20,63 +20,79 @@
static DEFINE_SPINLOCK(limit_lock);
struct nft_limit {
+ u64 last;
u64 tokens;
+ u64 tokens_max;
u64 rate;
- u64 unit;
- unsigned long stamp;
+ u64 nsecs;
+ u32 burst;
};
-static void nft_limit_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ u64 now, tokens;
+ s64 delta;
spin_lock_bh(&limit_lock);
- if (time_after_eq(jiffies, priv->stamp)) {
- priv->tokens = priv->rate;
- priv->stamp = jiffies + priv->unit * HZ;
- }
-
- if (priv->tokens >= 1) {
- priv->tokens--;
+ now = ktime_get_ns();
+ tokens = limit->tokens + now - limit->last;
+ if (tokens > limit->tokens_max)
+ tokens = limit->tokens_max;
+
+ limit->last = now;
+ delta = tokens - cost;
+ if (delta >= 0) {
+ limit->tokens = delta;
spin_unlock_bh(&limit_lock);
- return;
+ return false;
}
+ limit->tokens = tokens;
spin_unlock_bh(&limit_lock);
-
- regs->verdict.code = NFT_BREAK;
+ return true;
}
-static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
- [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
- [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
-};
-
-static int nft_limit_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
+static int nft_limit_init(struct nft_limit *limit,
const struct nlattr * const tb[])
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ u64 unit;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
- priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
- priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->stamp = jiffies + priv->unit * HZ;
- priv->tokens = priv->rate;
+ limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+ limit->nsecs = unit * NSEC_PER_SEC;
+ if (limit->rate == 0 || limit->nsecs < unit)
+ return -EOVERFLOW;
+ limit->tokens = limit->tokens_max = limit->nsecs;
+
+ if (tb[NFTA_LIMIT_BURST]) {
+ u64 rate;
+
+ limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+
+ rate = limit->rate + limit->burst;
+ if (rate < limit->rate)
+ return -EOVERFLOW;
+
+ limit->rate = rate;
+ }
+ limit->last = ktime_get_ns();
+
return 0;
}
-static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
+ enum nft_limit_type type)
{
- const struct nft_limit *priv = nft_expr_priv(expr);
+ u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
+ u64 rate = limit->rate - limit->burst;
- if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
- goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
+ nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
+ nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
+ nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
goto nla_put_failure;
return 0;
@@ -84,18 +100,114 @@ nla_put_failure:
return -1;
}
+struct nft_limit_pkts {
+ struct nft_limit limit;
+ u64 cost;
+};
+
+static void nft_limit_pkts_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+ if (nft_limit_eval(&priv->limit, priv->cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+ [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
+ [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
+ [NFTA_LIMIT_BURST] = { .type = NLA_U32 },
+ [NFTA_LIMIT_TYPE] = { .type = NLA_U32 },
+};
+
+static int nft_limit_pkts_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit_pkts *priv = nft_expr_priv(expr);
+ int err;
+
+ err = nft_limit_init(&priv->limit, tb);
+ if (err < 0)
+ return err;
+
+ priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate);
+ return 0;
+}
+
+static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+ return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
static struct nft_expr_type nft_limit_type;
-static const struct nft_expr_ops nft_limit_ops = {
+static const struct nft_expr_ops nft_limit_pkts_ops = {
+ .type = &nft_limit_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .eval = nft_limit_pkts_eval,
+ .init = nft_limit_pkts_init,
+ .dump = nft_limit_pkts_dump,
+};
+
+static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+ u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+ if (nft_limit_eval(priv, cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_limit *priv = nft_expr_priv(expr);
+
+ return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
- .eval = nft_limit_eval,
- .init = nft_limit_init,
- .dump = nft_limit_dump,
+ .eval = nft_limit_pkt_bytes_eval,
+ .init = nft_limit_pkt_bytes_init,
+ .dump = nft_limit_pkt_bytes_dump,
};
+static const struct nft_expr_ops *
+nft_limit_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_LIMIT_TYPE] == NULL)
+ return &nft_limit_pkts_ops;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+ case NFT_LIMIT_PKTS:
+ return &nft_limit_pkts_ops;
+ case NFT_LIMIT_PKT_BYTES:
+ return &nft_limit_pkt_bytes_ops;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_limit_type __read_mostly = {
.name = "limit",
- .ops = &nft_limit_ops,
+ .select_ops = nft_limit_select_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
.flags = NFT_EXPR_STATEFUL,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 94fb3b27a2c5..09b4b07eb676 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
@@ -17,6 +18,53 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+/* add vlan header into the user buffer for if tag was removed by offloads */
+static bool
+nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+{
+ int mac_off = skb_mac_header(skb) - skb->data;
+ u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
+ struct vlan_ethhdr veth;
+
+ vlanh = (u8 *) &veth;
+ if (offset < ETH_HLEN) {
+ u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
+
+ if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
+ return false;
+
+ veth.h_vlan_proto = skb->vlan_proto;
+
+ memcpy(dst_u8, vlanh + offset, ethlen);
+
+ len -= ethlen;
+ if (len == 0)
+ return true;
+
+ dst_u8 += ethlen;
+ offset = ETH_HLEN;
+ } else if (offset >= VLAN_ETH_HLEN) {
+ offset -= VLAN_HLEN;
+ goto skip;
+ }
+
+ veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth.h_vlan_encapsulated_proto = skb->protocol;
+
+ vlanh += offset;
+
+ vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
+ memcpy(dst_u8, vlanh, vlan_len);
+
+ len -= vlan_len;
+ if (!len)
+ return true;
+
+ dst_u8 += vlan_len;
+ skip:
+ return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+}
+
static void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr,
u32 *dest = &regs->data[priv->dreg];
int offset;
+ dest[priv->len / NFT_REG32_SIZE] = 0;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
+
+ if (skb_vlan_tag_present(skb)) {
+ if (!nft_payload_copy_vlan(dest, skb,
+ priv->offset, priv->len))
+ goto err;
+ return;
+ }
offset = skb_mac_header(skb) - skb->data;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
@@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 43ddeee404e9..8e524898ccea 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -181,9 +181,23 @@ out:
#endif
}
+static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
+{
+ switch (info->flags & (XT_CT_ZONE_DIR_ORIG |
+ XT_CT_ZONE_DIR_REPL)) {
+ case XT_CT_ZONE_DIR_ORIG:
+ return NF_CT_ZONE_DIR_ORIG;
+ case XT_CT_ZONE_DIR_REPL:
+ return NF_CT_ZONE_DIR_REPL;
+ default:
+ return NF_CT_DEFAULT_ZONE_DIR;
+ }
+}
+
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
+ struct nf_conntrack_zone zone;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
}
#ifndef CONFIG_NF_CONNTRACK_ZONES
- if (info->zone)
+ if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
+ XT_CT_ZONE_DIR_REPL |
+ XT_CT_ZONE_MARK))
goto err1;
#endif
@@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err1;
- ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
+ memset(&zone, 0, sizeof(zone));
+ zone.id = info->zone;
+ zone.dir = xt_ct_flags_to_dir(info);
+ if (info->flags & XT_CT_ZONE_MARK)
+ zone.flags |= NF_CT_FLAG_MARK;
+
+ ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
if (!ct) {
ret = -ENOMEM;
goto err2;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c3190e2fc6a..8c02501a530f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
inet_proto_csum_replace2(&tcph->check, skb,
htons(oldmss), htons(newmss),
- 0);
+ false);
return 0;
}
}
@@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb,
memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
inet_proto_csum_replace2(&tcph->check, skb,
- htons(len), htons(len + TCPOLEN_MSS), 1);
+ htons(len), htons(len + TCPOLEN_MSS), true);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = newmss & 0x00ff;
- inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
+ inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false);
oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
inet_proto_csum_replace2(&tcph->check, skb,
- oldval, ((__be16 *)tcph)[6], 0);
+ oldval, ((__be16 *)tcph)[6], false);
return TCPOLEN_MSS;
}
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 625fa1d636a0..eb92bffff11c 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
n <<= 8;
}
inet_proto_csum_replace2(&tcph->check, skb, htons(o),
- htons(n), 0);
+ htons(n), false);
}
memset(opt + i, TCPOPT_NOP, optl);
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index c5d6556dbc5e..fd980aa7715d 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -10,26 +10,15 @@
* modify it under the terms of the GNU General Public License
* version 2 or later, as published by the Free Software Foundation.
*/
-#include <linux/ip.h>
#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/route.h>
#include <linux/skbuff.h>
-#include <linux/notifier.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/route.h>
+#include <linux/route.h>
#include <linux/netfilter/x_tables.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
#include <linux/netfilter/xt_TEE.h>
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-# define WITH_CONNTRACK 1
-# include <net/netfilter/nf_conntrack.h>
-#endif
-
struct xt_tee_priv {
struct notifier_block notifier;
struct xt_tee_tginfo *tginfo;
@@ -38,161 +27,24 @@ struct xt_tee_priv {
static const union nf_inet_addr tee_zero_address;
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
- const struct dst_entry *dst;
-
- if (skb->dev != NULL)
- return dev_net(skb->dev);
- dst = skb_dst(skb);
- if (dst != NULL && dst->dev != NULL)
- return dev_net(dst->dev);
-#endif
- return &init_net;
-}
-
-static bool
-tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct net *net = pick_net(skb);
- struct rtable *rt;
- struct flowi4 fl4;
-
- memset(&fl4, 0, sizeof(fl4));
- if (info->priv) {
- if (info->priv->oif == -1)
- return false;
- fl4.flowi4_oif = info->priv->oif;
- }
- fl4.daddr = info->gw.ip;
- fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
- fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return false;
-
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- skb->dev = rt->dst.dev;
- skb->protocol = htons(ETH_P_IP);
- return true;
-}
-
static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- struct iphdr *iph;
- if (__this_cpu_read(nf_skb_duplicated))
- return XT_CONTINUE;
- /*
- * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
- * the original skb, which should continue on its way as if nothing has
- * happened. The copy should be independently delivered to the TEE
- * --gateway.
- */
- skb = pskb_copy(skb, GFP_ATOMIC);
- if (skb == NULL)
- return XT_CONTINUE;
-
-#ifdef WITH_CONNTRACK
- /* Avoid counting cloned packets towards the original connection. */
- nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_ct_untracked_get()->ct_general;
- skb->nfctinfo = IP_CT_NEW;
- nf_conntrack_get(skb->nfct);
-#endif
- /*
- * If we are in PREROUTING/INPUT, the checksum must be recalculated
- * since the length could have changed as a result of defragmentation.
- *
- * We also decrease the TTL to mitigate potential TEE loops
- * between two hosts.
- *
- * Set %IP_DF so that the original source is notified of a potentially
- * decreased MTU on the clone route. IPv6 does this too.
- */
- iph = ip_hdr(skb);
- iph->frag_off |= htons(IP_DF);
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_IN)
- --iph->ttl;
- ip_send_check(iph);
+ nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
- if (tee_tg_route4(skb, info)) {
- __this_cpu_write(nf_skb_duplicated, true);
- ip_local_out(skb);
- __this_cpu_write(nf_skb_duplicated, false);
- } else {
- kfree_skb(skb);
- }
return XT_CONTINUE;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static bool
-tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct net *net = pick_net(skb);
- struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- if (info->priv) {
- if (info->priv->oif == -1)
- return false;
- fl6.flowi6_oif = info->priv->oif;
- }
- fl6.daddr = info->gw.in6;
- fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
- (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
- fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- dst_release(dst);
- return false;
- }
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
- skb->dev = dst->dev;
- skb->protocol = htons(ETH_P_IPV6);
- return true;
-}
-
+#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- if (__this_cpu_read(nf_skb_duplicated))
- return XT_CONTINUE;
- skb = pskb_copy(skb, GFP_ATOMIC);
- if (skb == NULL)
- return XT_CONTINUE;
+ nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
-#ifdef WITH_CONNTRACK
- nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_ct_untracked_get()->ct_general;
- skb->nfctinfo = IP_CT_NEW;
- nf_conntrack_get(skb->nfct);
-#endif
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_IN) {
- struct ipv6hdr *iph = ipv6_hdr(skb);
- --iph->hop_limit;
- }
- if (tee_tg_route6(skb, info)) {
- __this_cpu_write(nf_skb_duplicated, true);
- ip6_local_out(skb);
- __this_cpu_write(nf_skb_duplicated, false);
- } else {
- kfree_skb(skb);
- }
return XT_CONTINUE;
}
#endif
@@ -277,7 +129,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index 9f8719df2001..bb9cbeb18868 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -42,10 +42,6 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
XT_CONNLABEL_OP_SET;
struct xt_connlabel_mtinfo *info = par->matchinfo;
int ret;
- size_t words;
-
- if (info->bit > XT_CONNLABEL_MAXBIT)
- return -ERANGE;
if (info->options & ~options) {
pr_err("Unknown options in mask %x\n", info->options);
@@ -59,19 +55,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par)
return ret;
}
- par->net->ct.labels_used++;
- words = BITS_TO_LONGS(info->bit+1);
- if (words > par->net->ct.label_words)
- par->net->ct.label_words = words;
-
+ ret = nf_connlabels_get(par->net, info->bit + 1);
+ if (ret < 0)
+ nf_ct_l3proto_module_put(par->family);
return ret;
}
static void connlabel_mt_destroy(const struct xt_mtdtor_param *par)
{
- par->net->ct.labels_used--;
- if (par->net->ct.labels_used == 0)
- par->net->ct.label_words = 0;
+ nf_connlabels_put(par->net);
nf_ct_l3proto_module_put(par->family);
}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 29ba6218a820..075d89d94d28 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head,
static unsigned int check_hlist(struct net *net,
struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
- u16 zone,
+ const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
@@ -201,7 +201,7 @@ static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr, const union nf_inet_addr *mask,
- u8 family, u16 zone)
+ u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
@@ -290,7 +290,8 @@ static int count_them(struct net *net,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
- u_int8_t family, u16 zone)
+ u_int8_t family,
+ const struct nf_conntrack_zone *zone)
{
struct rb_root *root;
int count;
@@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
- u16 zone = NF_CT_DEFAULT_ZONE;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 8c646ed9c921..3048a7e3a90a 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
struct xt_nfacct_match_info *info = par->matchinfo;
struct nf_acct *nfacct;
- nfacct = nfnl_acct_find_get(info->name);
+ nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
pr_info("xt_nfacct: accounting object with name `%s' "
"does not exists\n", info->name);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 67d210477863..a774985489e2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2401,7 +2401,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
* sendmsg(), but that's what we've got...
*/
if (netlink_tx_is_mmaped(sk) &&
- msg->msg_iter.type == ITER_IOVEC &&
+ iter_is_iovec(&msg->msg_iter) &&
msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 95af2d24d5be..943889b87a34 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -351,6 +351,20 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
}
EXPORT_SYMBOL(nci_prop_cmd);
+int nci_core_reset(struct nci_dev *ndev)
+{
+ return __nci_request(ndev, nci_reset_req, 0,
+ msecs_to_jiffies(NCI_RESET_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_reset);
+
+int nci_core_init(struct nci_dev *ndev)
+{
+ return __nci_request(ndev, nci_init_req, 0,
+ msecs_to_jiffies(NCI_INIT_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_init);
+
static int nci_open_device(struct nci_dev *ndev)
{
int rc = 0;
@@ -388,6 +402,10 @@ static int nci_open_device(struct nci_dev *ndev)
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
+ if (ndev->ops->post_setup) {
+ rc = ndev->ops->post_setup(ndev);
+ }
+
if (!rc) {
rc = __nci_request(ndev, nci_init_complete_req, 0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index af002df640c7..609f92283d1b 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -233,7 +233,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
- if (r == NCI_STATUS_OK)
+ if (r == NCI_STATUS_OK && skb)
*skb = conn_info->rx_skb;
return r;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f85f37ed19b2..853172c27f68 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -63,6 +63,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
.len = NFC_FIRMWARE_NAME_MAXSIZE },
[NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
+ [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
+
};
static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
@@ -1503,7 +1505,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
u32 dev_idx, vid, subcmd;
u8 *data;
size_t data_len;
- int i;
+ int i, err;
if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
!info->attrs[NFC_ATTR_VENDOR_ID] ||
@@ -1518,12 +1520,13 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
return -ENODEV;
- data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
- if (data) {
+ if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
+ data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
if (data_len == 0)
return -EINVAL;
} else {
+ data = NULL;
data_len = 0;
}
@@ -1533,12 +1536,92 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
continue;
- return cmd->doit(dev, data, data_len);
+ dev->cur_cmd_info = info;
+ err = cmd->doit(dev, data, data_len);
+ dev->cur_cmd_info = NULL;
+ return err;
}
return -EOPNOTSUPP;
}
+/* message building helper */
+static inline void *nfc_hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
+ int flags, u8 cmd)
+{
+ /* since there is no private header just add the generic one */
+ return genlmsg_put(skb, portid, seq, &nfc_genl_family, flags, cmd);
+}
+
+static struct sk_buff *
+__nfc_alloc_vendor_cmd_skb(struct nfc_dev *dev, int approxlen,
+ u32 portid, u32 seq,
+ enum nfc_attrs attr,
+ u32 oui, u32 subcmd, gfp_t gfp)
+{
+ struct sk_buff *skb;
+ void *hdr;
+
+ skb = nlmsg_new(approxlen + 100, gfp);
+ if (!skb)
+ return NULL;
+
+ hdr = nfc_hdr_put(skb, portid, seq, 0, NFC_CMD_VENDOR);
+ if (!hdr) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (nla_put_u32(skb, NFC_ATTR_DEVICE_INDEX, dev->idx))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, NFC_ATTR_VENDOR_ID, oui))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, NFC_ATTR_VENDOR_SUBCMD, subcmd))
+ goto nla_put_failure;
+
+ ((void **)skb->cb)[0] = dev;
+ ((void **)skb->cb)[1] = hdr;
+
+ return skb;
+
+nla_put_failure:
+ kfree_skb(skb);
+ return NULL;
+}
+
+struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev,
+ enum nfc_attrs attr,
+ u32 oui, u32 subcmd,
+ int approxlen)
+{
+ if (WARN_ON(!dev->cur_cmd_info))
+ return NULL;
+
+ return __nfc_alloc_vendor_cmd_skb(dev, approxlen,
+ dev->cur_cmd_info->snd_portid,
+ dev->cur_cmd_info->snd_seq, attr,
+ oui, subcmd, GFP_KERNEL);
+}
+EXPORT_SYMBOL(__nfc_alloc_vendor_cmd_reply_skb);
+
+int nfc_vendor_cmd_reply(struct sk_buff *skb)
+{
+ struct nfc_dev *dev = ((void **)skb->cb)[0];
+ void *hdr = ((void **)skb->cb)[1];
+
+ /* clear CB data for netlink core to own from now on */
+ memset(skb->cb, 0, sizeof(skb->cb));
+
+ if (WARN_ON(!dev->cur_cmd_info)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_reply(skb, dev->cur_cmd_info);
+}
+EXPORT_SYMBOL(nfc_vendor_cmd_reply);
+
static const struct genl_ops nfc_genl_ops[] = {
{
.cmd = NFC_CMD_GET_DEVICE,
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 422dc0567de9..af7cdef42066 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -31,6 +31,17 @@ config OPENVSWITCH
If unsure, say N.
+config OPENVSWITCH_CONNTRACK
+ bool "Open vSwitch conntrack action support"
+ depends on OPENVSWITCH
+ depends on NF_CONNTRACK
+ default OPENVSWITCH
+ ---help---
+ If you say Y here, then Open vSwitch module will be able to pass
+ packets through conntrack.
+
+ Say N to exclude this support and reduce the binary size.
+
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
@@ -59,7 +70,7 @@ config OPENVSWITCH_VXLAN
config OPENVSWITCH_GENEVE
tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
- depends on GENEVE_CORE
+ depends on GENEVE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 6e1701de04d8..5b5913b06f54 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,8 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
+openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
+
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 14da52ddd327..4487543806bb 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
@@ -29,8 +30,10 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+#include <net/dst.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/ip6_fib.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
@@ -38,6 +41,7 @@
#include "datapath.h"
#include "flow.h"
+#include "conntrack.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
@@ -52,6 +56,20 @@ struct deferred_action {
struct sw_flow_key pkt_key;
};
+#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+ unsigned long dst;
+ struct vport *vport;
+ struct ovs_skb_cb cb;
+ __be16 inner_protocol;
+ __u16 vlan_tci;
+ __be16 vlan_proto;
+ unsigned int l2_len;
+ u8 l2_data[MAX_L2_LEN];
+};
+
+static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
+
#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
int head;
@@ -185,10 +203,6 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-/* 'KEY' must not have any bits set outside of the 'MASK' */
-#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
-#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
-
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
const __be32 *mpls_lse, const __be32 *mask)
{
@@ -201,7 +215,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
return err;
stack = (__be32 *)skb_mpls_header(skb);
- lse = MASKED(*stack, *mpls_lse, *mask);
+ lse = OVS_MASKED(*stack, *mpls_lse, *mask);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), lse };
@@ -244,9 +258,9 @@ static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
const u16 *src = (const u16 *)src_;
const u16 *mask = (const u16 *)mask_;
- SET_MASKED(dst[0], src[0], mask[0]);
- SET_MASKED(dst[1], src[1], mask[1]);
- SET_MASKED(dst[2], src[2], mask[2]);
+ OVS_SET_MASKED(dst[0], src[0], mask[0]);
+ OVS_SET_MASKED(dst[1], src[1], mask[1]);
+ OVS_SET_MASKED(dst[2], src[2], mask[2]);
}
static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
@@ -284,14 +298,14 @@ static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
if (nh->protocol == IPPROTO_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
} else if (nh->protocol == IPPROTO_UDP) {
if (likely(transport_len >= sizeof(struct udphdr))) {
struct udphdr *uh = udp_hdr(skb);
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&uh->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -316,14 +330,14 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
if (l4_proto == NEXTHDR_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
} else if (l4_proto == NEXTHDR_UDP) {
if (likely(transport_len >= sizeof(struct udphdr))) {
struct udphdr *uh = udp_hdr(skb);
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&uh->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -331,17 +345,17 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
} else if (l4_proto == NEXTHDR_ICMP) {
if (likely(transport_len >= sizeof(struct icmp6hdr)))
inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
- skb, addr, new_addr, 1);
+ skb, addr, new_addr, true);
}
}
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
const __be32 mask[4], __be32 masked[4])
{
- masked[0] = MASKED(old[0], addr[0], mask[0]);
- masked[1] = MASKED(old[1], addr[1], mask[1]);
- masked[2] = MASKED(old[2], addr[2], mask[2]);
- masked[3] = MASKED(old[3], addr[3], mask[3]);
+ masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
+ masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
+ masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
+ masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
}
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
@@ -358,15 +372,15 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
{
/* Bits 21-24 are always unmasked, so this retains their values. */
- SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+ OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+ OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
u8 mask)
{
- new_ttl = MASKED(nh->ttl, new_ttl, mask);
+ new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
nh->ttl = new_ttl;
@@ -392,7 +406,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
* makes sense to check if the value actually changed.
*/
if (mask->ipv4_src) {
- new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
+ new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
if (unlikely(new_addr != nh->saddr)) {
set_ip_addr(skb, nh, &nh->saddr, new_addr);
@@ -400,7 +414,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv4_dst) {
- new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
+ new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
if (unlikely(new_addr != nh->daddr)) {
set_ip_addr(skb, nh, &nh->daddr, new_addr);
@@ -488,7 +502,8 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+ OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
+ mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
@@ -498,7 +513,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
- inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+ inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -517,8 +532,8 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh = udp_hdr(skb);
/* Either of the masks is non-zero, so do not bother checking them. */
- src = MASKED(uh->source, key->udp_src, mask->udp_src);
- dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
+ src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
+ dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
if (likely(src != uh->source)) {
@@ -558,12 +573,12 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return err;
th = tcp_hdr(skb);
- src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+ src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
if (likely(src != th->source)) {
set_tp_port(skb, &th->source, src, &th->check);
flow_key->tp.src = src;
}
- dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+ dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
if (likely(dst != th->dest)) {
set_tp_port(skb, &th->dest, dst, &th->check);
flow_key->tp.dst = dst;
@@ -590,8 +605,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
old_csum = sh->checksum;
old_correct_csum = sctp_compute_cksum(skb, sctphoff);
- sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
- sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
+ sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
+ sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
new_csum = sctp_compute_cksum(skb, sctphoff);
@@ -605,14 +620,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+{
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
+ struct vport *vport = data->vport;
+
+ if (skb_cow_head(skb, data->l2_len) < 0) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ __skb_dst_copy(skb, data->dst);
+ *OVS_CB(skb) = data->cb;
+ skb->inner_protocol = data->inner_protocol;
+ skb->vlan_tci = data->vlan_tci;
+ skb->vlan_proto = data->vlan_proto;
+
+ /* Reconstruct the MAC header. */
+ skb_push(skb, data->l2_len);
+ memcpy(skb->data, &data->l2_data, data->l2_len);
+ ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+ skb_reset_mac_header(skb);
+
+ ovs_vport_send(vport, skb);
+ return 0;
+}
+
+static unsigned int
+ovs_dst_get_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
+static struct dst_ops ovs_dst_ops = {
+ .family = AF_UNSPEC,
+ .mtu = ovs_dst_get_mtu,
+};
+
+/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
+ * ovs_vport_output(), which is called once per fragmented packet.
+ */
+static void prepare_frag(struct vport *vport, struct sk_buff *skb)
+{
+ unsigned int hlen = skb_network_offset(skb);
+ struct ovs_frag_data *data;
+
+ data = this_cpu_ptr(&ovs_frag_data_storage);
+ data->dst = skb->_skb_refdst;
+ data->vport = vport;
+ data->cb = *OVS_CB(skb);
+ data->inner_protocol = skb->inner_protocol;
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
+ data->l2_len = hlen;
+ memcpy(&data->l2_data, skb->data, hlen);
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ skb_pull(skb, hlen);
+}
+
+static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
+ __be16 ethertype)
+{
+ if (skb_network_offset(skb) > MAX_L2_LEN) {
+ OVS_NLERR(1, "L2 header too long to fragment");
+ return;
+ }
+
+ if (ethertype == htons(ETH_P_IP)) {
+ struct dst_entry ovs_dst;
+ unsigned long orig_dst;
+
+ prepare_frag(vport, skb);
+ dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ DST_OBSOLETE_NONE, DST_NOCOUNT);
+ ovs_dst.dev = vport->dev;
+
+ orig_dst = skb->_skb_refdst;
+ skb_dst_set_noref(skb, &ovs_dst);
+ IPCB(skb)->frag_max_size = mru;
+
+ ip_do_fragment(skb->sk, skb, ovs_vport_output);
+ refdst_drop(orig_dst);
+ } else if (ethertype == htons(ETH_P_IPV6)) {
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ unsigned long orig_dst;
+ struct rt6_info ovs_rt;
+
+ if (!v6ops) {
+ kfree_skb(skb);
+ return;
+ }
+
+ prepare_frag(vport, skb);
+ memset(&ovs_rt, 0, sizeof(ovs_rt));
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ DST_OBSOLETE_NONE, DST_NOCOUNT);
+ ovs_rt.dst.dev = vport->dev;
+
+ orig_dst = skb->_skb_refdst;
+ skb_dst_set_noref(skb, &ovs_rt.dst);
+ IP6CB(skb)->frag_max_size = mru;
+
+ v6ops->fragment(skb->sk, skb, ovs_vport_output);
+ refdst_drop(orig_dst);
+ } else {
+ WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
+ ovs_vport_name(vport), ntohs(ethertype), mru,
+ vport->dev->mtu);
+ kfree_skb(skb);
+ }
+}
+
+static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
+ struct sw_flow_key *key)
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport))
- ovs_vport_send(vport, skb);
- else
+ if (likely(vport)) {
+ u16 mru = OVS_CB(skb)->mru;
+
+ if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
+ ovs_vport_send(vport, skb);
+ } else if (mru <= vport->dev->mtu) {
+ __be16 ethertype = key->eth.type;
+
+ if (!is_flow_key_valid(key)) {
+ if (eth_p_mpls(skb->protocol))
+ ethertype = skb->inner_protocol;
+ else
+ ethertype = vlan_get_protocol(skb);
+ }
+
+ ovs_fragment(vport, skb, mru, ethertype);
+ } else {
+ kfree_skb(skb);
+ }
+ } else {
kfree_skb(skb);
+ }
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
@@ -626,6 +772,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_ACTION;
+ upcall.mru = OVS_CB(skb)->mru;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -770,12 +917,13 @@ static int execute_masked_set_action(struct sk_buff *skb,
switch (nla_type(a)) {
case OVS_KEY_ATTR_PRIORITY:
- SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+ OVS_SET_MASKED(skb->priority, nla_get_u32(a),
+ *get_mask(a, u32 *));
flow_key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
- SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+ OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
flow_key->phy.skb_mark = skb->mark;
break;
@@ -818,6 +966,13 @@ static int execute_masked_set_action(struct sk_buff *skb,
err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
__be32 *));
break;
+
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABEL:
+ err = -EINVAL;
+ break;
}
return err;
@@ -887,7 +1042,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
if (out_skb)
- do_output(dp, out_skb, prev_port);
+ do_output(dp, out_skb, prev_port, key);
prev_port = -1;
}
@@ -944,6 +1099,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a, attr, len);
break;
+
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
+ nla_data(a));
+
+ /* Hide stolen IP fragments from user space. */
+ if (err == -EINPROGRESS)
+ return 0;
+ break;
}
if (unlikely(err)) {
@@ -953,7 +1117,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
}
if (prev_port != -1)
- do_output(dp, skb, prev_port);
+ do_output(dp, skb, prev_port, key);
else
consume_skb(skb);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
new file mode 100644
index 000000000000..e8e524ad8a01
--- /dev/null
+++ b/net/openvswitch/conntrack.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/openvswitch.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include "datapath.h"
+#include "conntrack.h"
+#include "flow.h"
+#include "flow_netlink.h"
+
+struct ovs_ct_len_tbl {
+ size_t maxlen;
+ size_t minlen;
+};
+
+/* Metadata mark for masked write to conntrack mark */
+struct md_mark {
+ u32 value;
+ u32 mask;
+};
+
+/* Metadata label for masked write to conntrack label. */
+struct md_label {
+ struct ovs_key_ct_label value;
+ struct ovs_key_ct_label mask;
+};
+
+/* Conntrack action context for execution. */
+struct ovs_conntrack_info {
+ struct nf_conntrack_helper *helper;
+ struct nf_conntrack_zone zone;
+ struct nf_conn *ct;
+ u32 flags;
+ u16 family;
+ struct md_mark mark;
+ struct md_label label;
+};
+
+static u16 key_to_nfproto(const struct sw_flow_key *key)
+{
+ switch (ntohs(key->eth.type)) {
+ case ETH_P_IP:
+ return NFPROTO_IPV4;
+ case ETH_P_IPV6:
+ return NFPROTO_IPV6;
+ default:
+ return NFPROTO_UNSPEC;
+ }
+}
+
+/* Map SKB connection state into the values used by flow definition. */
+static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
+{
+ u8 ct_state = OVS_CS_F_TRACKED;
+
+ switch (ctinfo) {
+ case IP_CT_ESTABLISHED_REPLY:
+ case IP_CT_RELATED_REPLY:
+ case IP_CT_NEW_REPLY:
+ ct_state |= OVS_CS_F_REPLY_DIR;
+ break;
+ default:
+ break;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ ct_state |= OVS_CS_F_ESTABLISHED;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ ct_state |= OVS_CS_F_RELATED;
+ break;
+ case IP_CT_NEW:
+ case IP_CT_NEW_REPLY:
+ ct_state |= OVS_CS_F_NEW;
+ break;
+ default:
+ break;
+ }
+
+ return ct_state;
+}
+
+static u32 ovs_ct_get_mark(const struct nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+ return ct ? ct->mark : 0;
+#else
+ return 0;
+#endif
+}
+
+static void ovs_ct_get_label(const struct nf_conn *ct,
+ struct ovs_key_ct_label *label)
+{
+ struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+
+ if (cl) {
+ size_t len = cl->words * sizeof(long);
+
+ if (len > OVS_CT_LABEL_LEN)
+ len = OVS_CT_LABEL_LEN;
+ else if (len < OVS_CT_LABEL_LEN)
+ memset(label, 0, OVS_CT_LABEL_LEN);
+ memcpy(label, cl->bits, len);
+ } else {
+ memset(label, 0, OVS_CT_LABEL_LEN);
+ }
+}
+
+static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conn *ct)
+{
+ key->ct.state = state;
+ key->ct.zone = zone->id;
+ key->ct.mark = ovs_ct_get_mark(ct);
+ ovs_ct_get_label(ct, &key->ct.label);
+}
+
+/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
+ * previously sent the packet to conntrack via the ct action.
+ */
+static void ovs_ct_update_key(const struct sk_buff *skb,
+ struct sw_flow_key *key, bool post_ct)
+{
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u8 state = 0;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ state = ovs_ct_get_state(ctinfo);
+ if (ct->master)
+ state |= OVS_CS_F_RELATED;
+ zone = nf_ct_zone(ct);
+ } else if (post_ct) {
+ state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+ }
+ __ovs_ct_update_key(key, state, zone, ct);
+}
+
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
+{
+ ovs_ct_update_key(skb, key, false);
+}
+
+int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
+{
+ if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label),
+ &key->ct.label))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
+ u32 ct_mark, u32 mask)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u32 new_mark;
+
+
+ /* The connection could be invalid, in which case set_mark is no-op. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ new_mark = ct_mark | (ct->mark & ~(mask));
+ if (ct->mark != new_mark) {
+ ct->mark = new_mark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ key->ct.mark = new_mark;
+ }
+
+ return 0;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
+static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ct_label *label,
+ const struct ovs_key_ct_label *mask)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_labels *cl;
+ struct nf_conn *ct;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
+ return -ENOTSUPP;
+
+ /* The connection could be invalid, in which case set_label is no-op.*/
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ cl = nf_ct_labels_find(ct);
+ if (!cl) {
+ nf_ct_labels_ext_add(ct);
+ cl = nf_ct_labels_find(ct);
+ }
+ if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN)
+ return -ENOSPC;
+
+ err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask,
+ OVS_CT_LABEL_LEN / sizeof(u32));
+ if (err)
+ return err;
+
+ ovs_ct_get_label(ct, &key->ct.label);
+ return 0;
+}
+
+/* 'skb' should already be pulled to nh_ofs. */
+static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
+{
+ const struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ enum ip_conntrack_info ctinfo;
+ unsigned int protoff;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ switch (proto) {
+ case NFPROTO_IPV4:
+ protoff = ip_hdrlen(skb);
+ break;
+ case NFPROTO_IPV6: {
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+ break;
+ }
+ default:
+ WARN_ONCE(1, "helper invoked on non-IP family!");
+ return NF_DROP;
+ }
+
+ return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static int handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, struct sk_buff *skb)
+{
+ struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
+
+ if (key->eth.type == htons(ETH_P_IP)) {
+ enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+ int err;
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ err = ip_defrag(skb, user);
+ if (err)
+ return err;
+
+ ovs_cb.mru = IPCB(skb)->frag_max_size;
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
+ struct sk_buff *reasm;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ reasm = nf_ct_frag6_gather(skb, user);
+ if (!reasm)
+ return -EINPROGRESS;
+
+ if (skb == reasm)
+ return -EINVAL;
+
+ key->ip.proto = ipv6_hdr(reasm)->nexthdr;
+ skb_morph(skb, reasm);
+ consume_skb(reasm);
+ ovs_cb.mru = IP6CB(skb)->frag_max_size;
+#else
+ return -EPFNOSUPPORT;
+#endif
+ } else {
+ return -EPFNOSUPPORT;
+ }
+
+ key->ip.frag = OVS_FRAG_TYPE_NONE;
+ skb_clear_hash(skb);
+ skb->ignore_df = 1;
+ *OVS_CB(skb) = ovs_cb;
+
+ return 0;
+}
+
+static struct nf_conntrack_expect *
+ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
+ u16 proto, const struct sk_buff *skb)
+{
+ struct nf_conntrack_tuple tuple;
+
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+ return NULL;
+ return __nf_ct_expect_find(net, zone, &tuple);
+}
+
+/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
+static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+ if (!net_eq(net, read_pnet(&ct->ct_net)))
+ return false;
+ if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
+ return false;
+ if (info->helper) {
+ struct nf_conn_help *help;
+
+ help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+ if (help && rcu_access_pointer(help->helper) != info->helper)
+ return false;
+ }
+
+ return true;
+}
+
+static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ /* If we are recirculating packets to match on conntrack fields and
+ * committing with a separate conntrack action, then we don't need to
+ * actually run the packet through conntrack twice unless it's for a
+ * different zone.
+ */
+ if (!skb_nfct_cached(net, skb, info)) {
+ struct nf_conn *tmpl = info->ct;
+
+ /* Associate skb with specified zone. */
+ if (tmpl) {
+ if (skb->nfct)
+ nf_conntrack_put(skb->nfct);
+ nf_conntrack_get(&tmpl->ct_general);
+ skb->nfct = &tmpl->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ }
+
+ if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
+ skb) != NF_ACCEPT)
+ return -ENOENT;
+
+ if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
+ WARN_ONCE(1, "helper rejected packet");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Lookup connection and read fields into key. */
+static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ struct nf_conntrack_expect *exp;
+
+ exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
+ if (exp) {
+ u8 state;
+
+ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
+ __ovs_ct_update_key(key, state, &info->zone, exp->master);
+ } else {
+ int err;
+
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
+
+ ovs_ct_update_key(skb, key, true);
+ }
+
+ return 0;
+}
+
+/* Lookup connection and confirm if unconfirmed. */
+static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ u8 state;
+ int err;
+
+ state = key->ct.state;
+ if (key->ct.zone == info->zone.id &&
+ ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
+ /* Previous lookup has shown that this connection is already
+ * tracked and committed. Skip committing.
+ */
+ return 0;
+ }
+
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
+ if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+ return -EINVAL;
+
+ ovs_ct_update_key(skb, key, true);
+
+ return 0;
+}
+
+static bool label_nonzero(const struct ovs_key_ct_label *label)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(*label); i++)
+ if (label->ct_label[i])
+ return true;
+
+ return false;
+}
+
+int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info)
+{
+ int nh_ofs;
+ int err;
+
+ /* The conntrack module expects to be working at L3. */
+ nh_ofs = skb_network_offset(skb);
+ skb_pull(skb, nh_ofs);
+
+ if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
+ err = handle_fragments(net, key, info->zone.id, skb);
+ if (err)
+ return err;
+ }
+
+ if (info->flags & OVS_CT_F_COMMIT)
+ err = ovs_ct_commit(net, key, info, skb);
+ else
+ err = ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ goto err;
+
+ if (info->mark.mask) {
+ err = ovs_ct_set_mark(skb, key, info->mark.value,
+ info->mark.mask);
+ if (err)
+ goto err;
+ }
+ if (label_nonzero(&info->label.mask))
+ err = ovs_ct_set_label(skb, key, &info->label.value,
+ &info->label.mask);
+err:
+ skb_push(skb, nh_ofs);
+ return err;
+}
+
+static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
+ const struct sw_flow_key *key, bool log)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help;
+
+ helper = nf_conntrack_helper_try_module_get(name, info->family,
+ key->ip.proto);
+ if (!helper) {
+ OVS_NLERR(log, "Unknown helper \"%s\"", name);
+ return -EINVAL;
+ }
+
+ help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+ if (!help) {
+ module_put(helper->me);
+ return -ENOMEM;
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+ info->helper = helper;
+ return 0;
+}
+
+static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
+ [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
+ .maxlen = sizeof(u32) },
+ [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
+ .maxlen = sizeof(u16) },
+ [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
+ .maxlen = sizeof(struct md_mark) },
+ [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label),
+ .maxlen = sizeof(struct md_label) },
+ [OVS_CT_ATTR_HELPER] = { .minlen = 1,
+ .maxlen = NF_CT_HELPER_NAME_LEN }
+};
+
+static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
+ const char **helper, bool log)
+{
+ struct nlattr *a;
+ int rem;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ int maxlen = ovs_ct_attr_lens[type].maxlen;
+ int minlen = ovs_ct_attr_lens[type].minlen;
+
+ if (type > OVS_CT_ATTR_MAX) {
+ OVS_NLERR(log,
+ "Unknown conntrack attr (type=%d, max=%d)",
+ type, OVS_CT_ATTR_MAX);
+ return -EINVAL;
+ }
+ if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+ OVS_NLERR(log,
+ "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
+ type, nla_len(a), maxlen);
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_CT_ATTR_FLAGS:
+ info->flags = nla_get_u32(a);
+ break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case OVS_CT_ATTR_ZONE:
+ info->zone.id = nla_get_u16(a);
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case OVS_CT_ATTR_MARK: {
+ struct md_mark *mark = nla_data(a);
+
+ info->mark = *mark;
+ break;
+ }
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case OVS_CT_ATTR_LABEL: {
+ struct md_label *label = nla_data(a);
+
+ info->label = *label;
+ break;
+ }
+#endif
+ case OVS_CT_ATTR_HELPER:
+ *helper = nla_data(a);
+ if (!memchr(*helper, '\0', nla_len(a))) {
+ OVS_NLERR(log, "Invalid conntrack helper");
+ return -EINVAL;
+ }
+ break;
+ default:
+ OVS_NLERR(log, "Unknown conntrack attr (%d)",
+ type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
+{
+ if (attr == OVS_KEY_ATTR_CT_STATE)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ attr == OVS_KEY_ATTR_CT_ZONE)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ attr == OVS_KEY_ATTR_CT_MARK)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ attr == OVS_KEY_ATTR_CT_LABEL) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ return ovs_net->xt_label;
+ }
+
+ return false;
+}
+
+int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa, bool log)
+{
+ struct ovs_conntrack_info ct_info;
+ const char *helper = NULL;
+ u16 family;
+ int err;
+
+ family = key_to_nfproto(key);
+ if (family == NFPROTO_UNSPEC) {
+ OVS_NLERR(log, "ct family unspecified");
+ return -EINVAL;
+ }
+
+ memset(&ct_info, 0, sizeof(ct_info));
+ ct_info.family = family;
+
+ nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
+ NF_CT_DEFAULT_ZONE_DIR, 0);
+
+ err = parse_ct(attr, &ct_info, &helper, log);
+ if (err)
+ return err;
+
+ /* Set up template for tracking connections in specific zones. */
+ ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
+ if (!ct_info.ct) {
+ OVS_NLERR(log, "Failed to allocate conntrack template");
+ return -ENOMEM;
+ }
+ if (helper) {
+ err = ovs_ct_add_helper(&ct_info, helper, key, log);
+ if (err)
+ goto err_free_ct;
+ }
+
+ err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
+ sizeof(ct_info), log);
+ if (err)
+ goto err_free_ct;
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ nf_conntrack_get(&ct_info.ct->ct_general);
+ return 0;
+err_free_ct:
+ nf_conntrack_free(ct_info.ct);
+ return err;
+}
+
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
+ &ct_info->mark))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label),
+ &ct_info->label))
+ return -EMSGSIZE;
+ if (ct_info->helper) {
+ if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
+ ct_info->helper->name))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, start);
+
+ return 0;
+}
+
+void ovs_ct_free_action(const struct nlattr *a)
+{
+ struct ovs_conntrack_info *ct_info = nla_data(a);
+
+ if (ct_info->helper)
+ module_put(ct_info->helper->me);
+ if (ct_info->ct)
+ nf_ct_put(ct_info->ct);
+}
+
+void ovs_ct_init(struct net *net)
+{
+ unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE;
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ if (nf_connlabels_get(net, n_bits)) {
+ ovs_net->xt_label = false;
+ OVS_NLERR(true, "Failed to set connlabel length");
+ } else {
+ ovs_net->xt_label = true;
+ }
+}
+
+void ovs_ct_exit(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ if (ovs_net->xt_label)
+ nf_connlabels_put(net);
+}
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
new file mode 100644
index 000000000000..3cb30667a7dc
--- /dev/null
+++ b/net/openvswitch/conntrack.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OVS_CONNTRACK_H
+#define OVS_CONNTRACK_H 1
+
+#include "flow.h"
+
+struct ovs_conntrack_info;
+enum ovs_key_attr;
+
+#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
+void ovs_ct_init(struct net *);
+void ovs_ct_exit(struct net *);
+bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
+int ovs_ct_copy_action(struct net *, const struct nlattr *,
+ const struct sw_flow_key *, struct sw_flow_actions **,
+ bool log);
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
+
+int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
+ const struct ovs_conntrack_info *);
+
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
+void ovs_ct_free_action(const struct nlattr *a);
+#else
+#include <linux/errno.h>
+
+static inline void ovs_ct_init(struct net *net) { }
+
+static inline void ovs_ct_exit(struct net *net) { }
+
+static inline bool ovs_ct_verify(struct net *net, int attr)
+{
+ return false;
+}
+
+static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **acts, bool log)
+{
+ return -ENOTSUPP;
+}
+
+static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ return -ENOTSUPP;
+}
+
+static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info)
+{
+ return -ENOTSUPP;
+}
+
+static inline void ovs_ct_fill_key(const struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ key->ct.state = 0;
+ key->ct.zone = 0;
+ key->ct.mark = 0;
+ memset(&key->ct.label, 0, sizeof(key->ct.label));
+}
+
+static inline int ovs_ct_put_key(const struct sw_flow_key *key,
+ struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline void ovs_ct_free_action(const struct nlattr *a) { }
+#endif
+#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ffe984f5b95c..ec0f8d9cee73 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+ upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
@@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
if (upcall_info->actions_len)
size += nla_total_size(upcall_info->actions_len);
+ /* OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru)
+ size += nla_total_size(sizeof(upcall_info->mru));
+
return size;
}
+static void pad_packet(struct datapath *dp, struct sk_buff *skb)
+{
+ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+ size_t plen = NLA_ALIGN(skb->len) - skb->len;
+
+ if (plen > 0)
+ memset(skb_put(skb, plen), 0, plen);
+ }
+}
+
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
@@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
nla_nest_cancel(user_skb, nla);
}
+ /* Add OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru) {
+ if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
+ upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ pad_packet(dp, user_skb);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
goto out;
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
- if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
- size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
-
- if (plen > 0)
- memset(skb_put(user_skb, plen), 0, plen);
- }
+ pad_packet(dp, user_skb);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
@@ -527,6 +547,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
struct sk_buff *packet;
@@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
+ u16 mru = 0;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -564,18 +586,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
else
packet->protocol = htons(ETH_P_802_2);
+ /* Set packet's mru */
+ if (a[OVS_PACKET_ATTR_MRU]) {
+ mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
+ packet->ignore_df = 1;
+ }
+ OVS_CB(packet)->mru = mru;
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
- &flow->key, log);
+ err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
+ packet, &flow->key, log);
if (err)
goto err_flow_free;
- err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+ err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, &acts, log);
if (err)
goto err_flow_free;
@@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
- dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(net, ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (!input_vport)
goto err_unlock;
+ packet->dev = input_vport->dev;
OVS_CB(packet)->input_vport = input_vport;
sf_acts = rcu_dereference(flow->sf_acts);
@@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
+ [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
@@ -713,7 +744,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
/* OVS_FLOW_ATTR_ACTIONS */
if (should_fill_actions(ufid_flags))
- len += nla_total_size(acts->actions_len);
+ len += nla_total_size(acts->orig_len);
return len
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
@@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow *flow = NULL, *new_flow;
@@ -915,7 +947,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Extract key. */
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
@@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;
/* Validate actions. */
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- &acts, log);
+ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
+ &new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
goto err_kfree_flow;
@@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
@@ -1038,7 +1070,8 @@ error:
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
+static struct sw_flow_actions *get_flow_actions(struct net *net,
+ const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
bool log)
@@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
int error;
ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
+ error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
if (error) {
OVS_NLERR(log,
"Actions may not be safe on all matching packets");
@@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
@@ -1084,15 +1118,15 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
- log);
+ acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
+ &mask, log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
@@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
@@ -1174,6 +1208,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow;
@@ -1188,7 +1223,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
} else if (!ufid_present) {
OVS_NLERR(log,
@@ -1232,6 +1267,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow = NULL;
@@ -1246,8 +1282,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
- log);
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+ NULL, log);
if (unlikely(err))
return err;
}
@@ -2203,6 +2239,7 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+ ovs_ct_init(net);
return 0;
}
@@ -2237,6 +2274,7 @@ static void __net_exit ovs_exit_net(struct net *dnet)
struct net *net;
LIST_HEAD(head);
+ ovs_ct_exit(dnet);
ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 6b28c5cedb23..4e785ab88973 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,6 +27,7 @@
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
+#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
#include "vport.h"
@@ -97,10 +98,13 @@ struct datapath {
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
+ * @mru: The maximum received fragement size; 0 if the packet is not
+ * fragmented.
*/
struct ovs_skb_cb {
struct ip_tunnel_info *egress_tun_info;
struct vport *input_vport;
+ u16 mru;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -113,6 +117,7 @@ struct ovs_skb_cb {
* then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
* @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
+ * @mru: If not zero, Maximum received IP fragment size.
*/
struct dp_upcall_info {
const struct ip_tunnel_info *egress_tun_info;
@@ -121,6 +126,7 @@ struct dp_upcall_info {
int actions_len;
u32 portid;
u8 cmd;
+ u16 mru;
};
/**
@@ -132,6 +138,9 @@ struct ovs_net {
struct list_head dps;
struct work_struct dp_notify_work;
struct vport_net vport_net;
+
+ /* Module reference for configuring conntrack. */
+ bool xt_label;
};
extern int ovs_net_id;
@@ -200,6 +209,10 @@ void ovs_dp_notify_wq(struct work_struct *work);
int action_fifos_init(void);
void action_fifos_exit(void);
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
+
#define OVS_NLERR(logging_allowed, fmt, ...) \
do { \
if (logging_allowed && net_ratelimit()) \
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8db22ef73626..9760dc43bdb9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -49,6 +49,7 @@
#include "datapath.h"
#include "flow.h"
#include "flow_netlink.h"
+#include "conntrack.h"
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
@@ -687,6 +688,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
{
/* Extract metadata from packet. */
if (tun_info) {
+ if (ip_tunnel_info_af(tun_info) != AF_INET)
+ return -EINVAL;
memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
if (tun_info->options) {
@@ -707,13 +710,14 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
+ ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
key->recirc_id = 0;
return key_extract(skb, key);
}
-int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb,
struct sw_flow_key *key, bool log)
{
@@ -722,7 +726,7 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr,
memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
/* Extract metadata from netlink attributes. */
- err = ovs_nla_get_flow_metadata(attr, key, log);
+ err = ovs_nla_get_flow_metadata(net, attr, key, log);
if (err)
return err;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b62cdb3e3589..fe527d2dd4b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -111,6 +111,14 @@ struct sw_flow_key {
} nd;
} ipv6;
};
+ struct {
+ /* Connection tracking fields. */
+ u16 zone;
+ u32 mark;
+ u8 state;
+ struct ovs_key_ct_label label;
+ } ct;
+
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow_key_range {
@@ -144,6 +152,7 @@ struct sw_flow_id {
struct sw_flow_actions {
struct rcu_head rcu;
+ size_t orig_len; /* From flow_cmd_new netlink actions size */
u32 actions_len;
struct nlattr actions[];
};
@@ -212,7 +221,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb,
struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */
-int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb,
struct sw_flow_key *key, bool log);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a6eb77ab1a64..e22c5bfe8575 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -290,6 +290,10 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
+ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -339,6 +343,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
+ [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
+ [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) },
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -534,19 +542,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ SW_FLOW_KEY_PUT(match, tun_key.tos,
nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ SW_FLOW_KEY_PUT(match, tun_key.ttl,
nla_get_u8(a), is_mask);
ttl = true;
break;
@@ -609,7 +617,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
if (!is_mask) {
- if (!match->key->tun_key.ipv4_dst) {
+ if (!match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
@@ -647,18 +655,18 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->ipv4_src &&
+ if (output->u.ipv4.src &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->ipv4_src))
+ output->u.ipv4.src))
return -EMSGSIZE;
- if (output->ipv4_dst &&
+ if (output->u.ipv4.dst &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->ipv4_dst))
+ output->u.ipv4.dst))
return -EMSGSIZE;
- if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ if (output->tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
@@ -715,9 +723,9 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
egress_tun_info->options_len);
}
-static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 *attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -768,16 +776,47 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
+
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
+ u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]);
+
+ SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
+ u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
+
+ SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
+ u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
+
+ SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) {
+ const struct ovs_key_ct_label *cl;
+
+ cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]);
+ SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label,
+ sizeof(*cl), is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL);
+ }
return 0;
}
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
int err;
- err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
+ err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
if (err)
return err;
@@ -1029,6 +1068,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* mask. In case the 'mask' is NULL, the flow is treated as exact match
* flow. Otherwise, it is treated as a wildcarded flow, except the mask
* does not include any don't care bit.
+ * @net: Used to determine per-namespace field support.
* @match: receives the extracted flow match information.
* @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence. The fields should of the packet that triggered the creation
@@ -1039,7 +1079,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging.
*/
-int ovs_nla_get_match(struct sw_flow_match *match,
+int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
const struct nlattr *nla_key,
const struct nlattr *nla_mask,
bool log)
@@ -1089,7 +1129,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
+ err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
if (err)
return err;
@@ -1116,7 +1156,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.ipv4_dst)
+ if (match->key->tun_key.u.ipv4.dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
@@ -1169,7 +1209,8 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
+ err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
+ log);
if (err)
goto free_newmask;
}
@@ -1250,7 +1291,7 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
* extracted from the packet itself.
*/
-int ovs_nla_get_flow_metadata(const struct nlattr *attr,
+int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
struct sw_flow_key *key,
bool log)
{
@@ -1266,9 +1307,10 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
memset(&match, 0, sizeof(match));
match.key = key;
+ memset(&key->ct, 0, sizeof(key->ct));
key->phy.in_port = DP_MAX_PORTS;
- return metadata_from_nlattrs(&match, &attrs, a, false, log);
+ return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
}
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
@@ -1287,7 +1329,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
@@ -1314,6 +1356,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
+ if (ovs_ct_put_key(output, skb))
+ goto nla_put_failure;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -1574,6 +1619,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
case OVS_ACTION_ATTR_SET:
ovs_nla_free_set_action(a);
break;
+ case OVS_ACTION_ATTR_CT:
+ ovs_ct_free_action(a);
+ break;
}
}
@@ -1619,6 +1667,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
+ acts->orig_len = (*sfa)->orig_len;
kfree(*sfa);
*sfa = acts;
@@ -1646,8 +1695,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
return a;
}
-static int add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len, bool log)
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
+ int len, bool log)
{
struct nlattr *a;
@@ -1662,7 +1711,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa,
int used = (*sfa)->actions_len;
int err;
- err = add_action(sfa, attrtype, NULL, 0, log);
+ err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
@@ -1678,12 +1727,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
-static int validate_and_copy_sample(const struct nlattr *attr,
+static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
@@ -1715,15 +1764,15 @@ static int validate_and_copy_sample(const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+ nla_data(probability), sizeof(u32), log);
if (err)
return err;
st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
- err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
@@ -1892,6 +1941,8 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABEL:
case OVS_KEY_ATTR_ETHERNET:
break;
@@ -2057,7 +2108,7 @@ static int copy_action(const struct nlattr *from,
return 0;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
@@ -2081,7 +2132,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_SET] = (u32)-1,
[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
- [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
+ [OVS_ACTION_ATTR_CT] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2188,13 +2240,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa,
+ err = validate_and_copy_sample(net, a, key, depth, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_copy_action(net, a, key, sfa, log);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
@@ -2213,7 +2272,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
}
/* 'key' must be the masked key. */
-int ovs_nla_copy_actions(const struct nlattr *attr,
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
@@ -2223,7 +2282,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
- err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+ (*sfa)->orig_len = nla_len(attr);
+ err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
key->eth.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@@ -2348,6 +2408,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
if (err)
return err;
break;
+
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_action_to_attr(nla_data(a), skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index acd074408f0a..07878e22e783 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -45,15 +45,16 @@ void ovs_match_init(struct sw_flow_match *match,
int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
int attr, bool is_mask, struct sk_buff *);
-int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
- bool log);
+int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *,
+ struct sw_flow_key *, bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
-int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
- const struct nlattr *mask, bool log);
+int ovs_nla_get_match(struct net *, struct sw_flow_match *,
+ const struct nlattr *key, const struct nlattr *mask,
+ bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ip_tunnel_info *);
@@ -62,9 +63,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log);
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
-int ovs_nla_copy_actions(const struct nlattr *attr,
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log);
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
+ void *data, int len, bool log);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3a9d1dde76ed..d22d8e948d0f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -426,7 +426,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.ipv4_dst)
+ if (key->tun_key.u.ipv4.dst)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 1da3a14d1010..fa37c95f7339 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -26,95 +26,44 @@
#include "datapath.h"
#include "vport.h"
+#include "vport-netdev.h"
static struct vport_ops ovs_geneve_vport_ops;
-
/**
* struct geneve_port - Keeps track of open UDP ports
- * @gs: The socket created for this port number.
- * @name: vport name.
+ * @dst_port: destination port.
*/
struct geneve_port {
- struct geneve_sock *gs;
- char name[IFNAMSIZ];
+ u16 port_no;
};
-static LIST_HEAD(geneve_ports);
-
static inline struct geneve_port *geneve_vport(const struct vport *vport)
{
return vport_priv(vport);
}
-/* Convert 64 bit tunnel ID to 24 bit VNI. */
-static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- vni[0] = (__force __u8)(tun_id >> 16);
- vni[1] = (__force __u8)(tun_id >> 8);
- vni[2] = (__force __u8)tun_id;
-#else
- vni[0] = (__force __u8)((__force u64)tun_id >> 40);
- vni[1] = (__force __u8)((__force u64)tun_id >> 48);
- vni[2] = (__force __u8)((__force u64)tun_id >> 56);
-#endif
-}
-
-/* Convert 24 bit VNI to 64 bit tunnel ID. */
-static __be64 vni_to_tunnel_id(const __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- return (vni[0] << 16) | (vni[1] << 8) | vni[2];
-#else
- return (__force __be64)(((__force u64)vni[0] << 40) |
- ((__force u64)vni[1] << 48) |
- ((__force u64)vni[2] << 56));
-#endif
-}
-
-static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
-{
- struct vport *vport = gs->rcv_data;
- struct genevehdr *geneveh = geneve_hdr(skb);
- int opts_len;
- struct ip_tunnel_info tun_info;
- __be64 key;
- __be16 flags;
-
- opts_len = geneveh->opt_len * 4;
-
- flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
- (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
- (geneveh->oam ? TUNNEL_OAM : 0) |
- (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
-
- key = vni_to_tunnel_id(geneveh->vni);
-
- ip_tunnel_info_init(&tun_info, ip_hdr(skb),
- udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, flags, geneveh->options, opts_len);
-
- ovs_vport_receive(vport, skb, &tun_info);
-}
-
static int geneve_get_options(const struct vport *vport,
struct sk_buff *skb)
{
struct geneve_port *geneve_port = geneve_vport(vport);
- struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk);
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport)))
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no))
return -EMSGSIZE;
return 0;
}
-static void geneve_tnl_destroy(struct vport *vport)
+static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ip_tunnel_info *egress_tun_info)
{
struct geneve_port *geneve_port = geneve_vport(vport);
+ struct net *net = ovs_dp_get_net(vport->dp);
+ __be16 dport = htons(geneve_port->port_no);
+ __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- geneve_sock_release(geneve_port->gs);
-
- ovs_vport_deferred_free(vport);
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark, sport, dport);
}
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
@@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct geneve_port *geneve_port;
- struct geneve_sock *gs;
+ struct net_device *dev;
struct vport *vport;
struct nlattr *a;
- int err;
u16 dst_port;
+ int err;
if (!options) {
err = -EINVAL;
@@ -148,104 +97,40 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
return vport;
geneve_port = geneve_vport(vport);
- strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+ geneve_port->port_no = dst_port;
- gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
- if (IS_ERR(gs)) {
+ rtnl_lock();
+ dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
ovs_vport_free(vport);
- return (void *)gs;
+ return ERR_CAST(dev);
}
- geneve_port->gs = gs;
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
return vport;
error:
return ERR_PTR(err);
}
-static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
+static struct vport *geneve_create(const struct vport_parms *parms)
{
- const struct ip_tunnel_key *tun_key;
- struct ip_tunnel_info *tun_info;
- struct net *net = ovs_dp_get_net(vport->dp);
- struct geneve_port *geneve_port = geneve_vport(vport);
- __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
- __be16 sport;
- struct rtable *rt;
- struct flowi4 fl;
- u8 vni[3], opts_len, *opts;
- __be16 df;
- int err;
-
- tun_info = OVS_CB(skb)->egress_tun_info;
- if (unlikely(!tun_info)) {
- err = -EINVAL;
- goto error;
- }
-
- tun_key = &tun_info->key;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- tunnel_id_to_vni(tun_key->tun_id, vni);
- skb->ignore_df = 1;
-
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
- opts = (u8 *)tun_info->options;
- opts_len = tun_info->options_len;
- } else {
- opts = NULL;
- opts_len = 0;
- }
-
- err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
- tun_key->ipv4_dst, tun_key->ipv4_tos,
- tun_key->ipv4_ttl, df, sport, dport,
- tun_key->tun_flags, vni, opts_len, opts,
- !!(tun_key->tun_flags & TUNNEL_CSUM), false);
- if (err < 0)
- ip_rt_put(rt);
- return err;
-
-error:
- kfree_skb(skb);
- return err;
-}
-
-static const char *geneve_get_name(const struct vport *vport)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
-
- return geneve_port->name;
-}
+ struct vport *vport;
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ip_tunnel_info *egress_tun_info)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+ vport = geneve_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
- /* Get tp_src and tp_dst, refert to geneve_build_header().
- */
- return ovs_tunnel_get_egress_info(egress_tun_info,
- ovs_dp_get_net(vport->dp),
- OVS_CB(skb)->egress_tun_info,
- IPPROTO_UDP, skb->mark, sport, dport);
+ return ovs_netdev_link(vport, parms->name);
}
static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
- .create = geneve_tnl_create,
- .destroy = geneve_tnl_destroy,
- .get_name = geneve_get_name,
+ .create = geneve_create,
+ .destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
- .send = geneve_tnl_send,
+ .send = ovs_netdev_send,
.owner = THIS_MODULE,
.get_egress_tun_info = geneve_get_egress_tun_info,
};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index c058bbf876c3..80b3e12ec882 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -135,7 +135,7 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
- netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH;
netdev->destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4b70aaa4a746..a75011505039 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -57,7 +57,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
skb_push(skb, ETH_HLEN);
ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- ovs_vport_receive(vport, skb, skb_tunnel_info(skb, AF_INET));
+ ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d14f59403c5e..40164037928e 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->egress_tun_info = NULL;
+ OVS_CB(skb)->mru = 0;
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_key_extract(tun_info, skb, &key);
if (unlikely(error)) {
@@ -586,6 +587,8 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
if (unlikely(!tun_info))
return -EINVAL;
+ if (ip_tunnel_info_af(tun_info) != AF_INET)
+ return -EINVAL;
tun_key = &tun_info->key;
@@ -603,9 +606,9 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
* saddr, tp_src and tp_dst
*/
__ip_tunnel_info_init(egress_tun_info,
- fl.saddr, tun_key->ipv4_dst,
- tun_key->ipv4_tos,
- tun_key->ipv4_ttl,
+ fl.saddr, tun_key->u.ipv4.dst,
+ tun_key->tos,
+ tun_key->ttl,
tp_src, tp_dst,
tun_key->tun_id,
tun_key->tun_flags,
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 1a689c28b5a6..b88b3ee86f07 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -254,9 +254,9 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
struct rtable *rt;
memset(fl, 0, sizeof(*fl));
- fl->daddr = key->ipv4_dst;
- fl->saddr = key->ipv4_src;
- fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = mark;
fl->flowi4_proto = protocol;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 896834cd3b9a..a2f28a6d4dc5 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -438,6 +438,14 @@ static const struct proto_ops rds_proto_ops = {
.sendpage = sock_no_sendpage,
};
+static void rds_sock_destruct(struct sock *sk)
+{
+ struct rds_sock *rs = rds_sk_to_rs(sk);
+
+ WARN_ON((&rs->rs_item != rs->rs_item.next ||
+ &rs->rs_item != rs->rs_item.prev));
+}
+
static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
{
struct rds_sock *rs;
@@ -445,6 +453,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
sock_init_data(sock, sk);
sock->ops = &rds_proto_ops;
sk->sk_protocol = protocol;
+ sk->sk_destruct = rds_sock_destruct;
rs = rds_sk_to_rs(sk);
spin_lock_init(&rs->rs_lock);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4fecb21ca25..a50e652eb269 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -301,6 +301,8 @@ void rds_conn_shutdown(struct rds_connection *conn)
wait_event(conn->c_waitq,
!test_bit(RDS_IN_XMIT, &conn->c_flags));
+ wait_event(conn->c_waitq,
+ !test_bit(RDS_RECV_REFILL, &conn->c_flags));
conn->c_trans->conn_shutdown(conn);
rds_conn_reset(conn);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 13814227b3b2..d020fade312c 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -366,6 +366,7 @@ void rds_ib_exit(void)
rds_ib_sysctl_exit();
rds_ib_recv_exit();
rds_trans_unregister(&rds_ib_transport);
+ rds_ib_fmr_exit();
}
struct rds_transport rds_ib_transport = {
@@ -401,10 +402,14 @@ int rds_ib_init(void)
INIT_LIST_HEAD(&rds_ib_devices);
- ret = ib_register_client(&rds_ib_client);
+ ret = rds_ib_fmr_init();
if (ret)
goto out;
+ ret = ib_register_client(&rds_ib_client);
+ if (ret)
+ goto out_fmr_exit;
+
ret = rds_ib_sysctl_init();
if (ret)
goto out_ibreg;
@@ -427,6 +432,8 @@ out_sysctl:
rds_ib_sysctl_exit();
out_ibreg:
rds_ib_unregister_client();
+out_fmr_exit:
+ rds_ib_fmr_exit();
out:
return ret;
}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 86d88ec5d556..9fc95e38659a 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -313,6 +313,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
void rds_ib_sync_mr(void *trans_private, int dir);
void rds_ib_free_mr(void *trans_private, int invalidate);
void rds_ib_flush_mrs(void);
+int rds_ib_fmr_init(void);
+void rds_ib_fmr_exit(void);
/* ib_recv.c */
int rds_ib_recv_init(void);
@@ -320,7 +322,7 @@ void rds_ib_recv_exit(void);
int rds_ib_recv(struct rds_connection *conn);
int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
-void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
+void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
void rds_ib_inc_free(struct rds_incoming *inc);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f40d8f52b753..d150bb4aa3cb 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -135,7 +135,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
rds_ib_recv_init_ring(ic);
/* Post receive buffers - as a side effect, this will update
* the posted credit count. */
- rds_ib_recv_refill(conn, 1);
+ rds_ib_recv_refill(conn, 1, GFP_KERNEL);
/* Tune RNR behavior */
rds_ib_tune_rnr(ic, &qp_attr);
@@ -640,6 +640,15 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
(atomic_read(&ic->i_signaled_sends) == 0));
tasklet_kill(&ic->i_recv_tasklet);
+ /* first destroy the ib state that generates callbacks */
+ if (ic->i_cm_id->qp)
+ rdma_destroy_qp(ic->i_cm_id);
+ if (ic->i_send_cq)
+ ib_destroy_cq(ic->i_send_cq);
+ if (ic->i_recv_cq)
+ ib_destroy_cq(ic->i_recv_cq);
+
+ /* then free the resources that ib callbacks use */
if (ic->i_send_hdrs)
ib_dma_free_coherent(dev,
ic->i_send_ring.w_nr *
@@ -663,12 +672,6 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
if (ic->i_recvs)
rds_ib_recv_clear_ring(ic);
- if (ic->i_cm_id->qp)
- rdma_destroy_qp(ic->i_cm_id);
- if (ic->i_send_cq)
- ib_destroy_cq(ic->i_send_cq);
- if (ic->i_recv_cq)
- ib_destroy_cq(ic->i_recv_cq);
rdma_destroy_id(ic->i_cm_id);
/*
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 657ba9f5d308..251d1ce0b7c7 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -83,6 +83,25 @@ struct rds_ib_mr_pool {
struct ib_fmr_attr fmr_attr;
};
+struct workqueue_struct *rds_ib_fmr_wq;
+
+int rds_ib_fmr_init(void)
+{
+ rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
+ if (!rds_ib_fmr_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+/* By the time this is called all the IB devices should have been torn down and
+ * had their pools freed. As each pool is freed its work struct is waited on,
+ * so the pool flushing work queue should be idle by the time we get here.
+ */
+void rds_ib_fmr_exit(void)
+{
+ destroy_workqueue(rds_ib_fmr_wq);
+}
+
static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
@@ -151,12 +170,17 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
struct rds_ib_device *rds_ibdev_old;
rds_ibdev_old = rds_ib_get_device(ipaddr);
- if (rds_ibdev_old) {
+ if (!rds_ibdev_old)
+ return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+
+ if (rds_ibdev_old != rds_ibdev) {
rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
rds_ib_dev_put(rds_ibdev_old);
+ return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
}
+ rds_ib_dev_put(rds_ibdev_old);
- return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+ return 0;
}
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -336,8 +360,6 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
goto out_no_cigar;
}
- memset(ibmr, 0, sizeof(*ibmr));
-
ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
(IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
@@ -485,7 +507,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
/* FIXME we need a way to tell a r/w MR
* from a r/o MR */
- BUG_ON(irqs_disabled());
+ WARN_ON(!page->mapping && irqs_disabled());
set_page_dirty(page);
put_page(page);
}
@@ -523,11 +545,13 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
/*
* given an llist of mrs, put them all into the list_head for more processing
*/
-static void llist_append_to_list(struct llist_head *llist, struct list_head *list)
+static unsigned int llist_append_to_list(struct llist_head *llist,
+ struct list_head *list)
{
struct rds_ib_mr *ibmr;
struct llist_node *node;
struct llist_node *next;
+ unsigned int count = 0;
node = llist_del_all(llist);
while (node) {
@@ -535,7 +559,9 @@ static void llist_append_to_list(struct llist_head *llist, struct list_head *lis
ibmr = llist_entry(node, struct rds_ib_mr, llnode);
list_add_tail(&ibmr->unmap_list, list);
node = next;
+ count++;
}
+ return count;
}
/*
@@ -576,7 +602,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
unsigned long unpinned = 0;
- unsigned int nfreed = 0, ncleaned = 0, free_goal;
+ unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
int ret = 0;
rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
@@ -618,8 +644,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
/* Get the list of all MRs to be dropped. Ordering matters -
* we want to put drop_list ahead of free_list.
*/
- llist_append_to_list(&pool->drop_list, &unmap_list);
- llist_append_to_list(&pool->free_list, &unmap_list);
+ dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
+ dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
if (free_all)
llist_append_to_list(&pool->clean_list, &unmap_list);
@@ -647,7 +673,6 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
kfree(ibmr);
nfreed++;
}
- ncleaned++;
}
if (!list_empty(&unmap_list)) {
@@ -673,7 +698,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
}
atomic_sub(unpinned, &pool->free_pinned);
- atomic_sub(ncleaned, &pool->dirty_count);
+ atomic_sub(dirty_to_clean, &pool->dirty_count);
atomic_sub(nfreed, &pool->item_count);
out:
@@ -710,16 +735,18 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
/* If we've pinned too many pages, request a flush */
if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
- atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- schedule_delayed_work(&pool->flush_worker, 10);
+ atomic_read(&pool->dirty_count) >= pool->max_items / 5)
+ queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
if (invalidate) {
if (likely(!in_interrupt())) {
rds_ib_flush_mr_pool(pool, 0, NULL);
} else {
/* We get here if the user created a MR marked
- * as use_once and invalidate at the same time. */
- schedule_delayed_work(&pool->flush_worker, 10);
+ * as use_once and invalidate at the same time.
+ */
+ queue_delayed_work(rds_ib_fmr_wq,
+ &pool->flush_worker, 10);
}
}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index cac5b4506ee3..6bbe62060060 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -297,7 +297,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic
}
static int rds_ib_recv_refill_one(struct rds_connection *conn,
- struct rds_ib_recv_work *recv, int prefill)
+ struct rds_ib_recv_work *recv, gfp_t gfp)
{
struct rds_ib_connection *ic = conn->c_transport_data;
struct ib_sge *sge;
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
gfp_t slab_mask = GFP_NOWAIT;
gfp_t page_mask = GFP_NOWAIT;
- if (prefill) {
+ if (gfp & __GFP_WAIT) {
slab_mask = GFP_KERNEL;
page_mask = GFP_HIGHUSER;
}
@@ -347,6 +347,24 @@ out:
return ret;
}
+static int acquire_refill(struct rds_connection *conn)
+{
+ return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0;
+}
+
+static void release_refill(struct rds_connection *conn)
+{
+ clear_bit(RDS_RECV_REFILL, &conn->c_flags);
+
+ /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
+ * hot path and finding waiters is very rare. We don't want to walk
+ * the system-wide hashed waitqueue buckets in the fast path only to
+ * almost never find waiters.
+ */
+ if (waitqueue_active(&conn->c_waitq))
+ wake_up_all(&conn->c_waitq);
+}
+
/*
* This tries to allocate and post unused work requests after making sure that
* they have all the allocations they need to queue received fragments into
@@ -354,15 +372,23 @@ out:
*
* -1 is returned if posting fails due to temporary resource exhaustion.
*/
-void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
+void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
{
struct rds_ib_connection *ic = conn->c_transport_data;
struct rds_ib_recv_work *recv;
struct ib_recv_wr *failed_wr;
unsigned int posted = 0;
int ret = 0;
+ bool can_wait = !!(gfp & __GFP_WAIT);
u32 pos;
+ /* the goal here is to just make sure that someone, somewhere
+ * is posting buffers. If we can't get the refill lock,
+ * let them do their thing
+ */
+ if (!acquire_refill(conn))
+ return;
+
while ((prefill || rds_conn_up(conn)) &&
rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
if (pos >= ic->i_recv_ring.w_nr) {
@@ -372,7 +398,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
}
recv = &ic->i_recvs[pos];
- ret = rds_ib_recv_refill_one(conn, recv, prefill);
+ ret = rds_ib_recv_refill_one(conn, recv, gfp);
if (ret) {
break;
}
@@ -402,6 +428,24 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
if (ret)
rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
+
+ release_refill(conn);
+
+ /* if we're called from the softirq handler, we'll be GFP_NOWAIT.
+ * in this case the ring being low is going to lead to more interrupts
+ * and we can safely let the softirq code take care of it unless the
+ * ring is completely empty.
+ *
+ * if we're called from krdsd, we'll be GFP_KERNEL. In this case
+ * we might have raced with the softirq code while we had the refill
+ * lock held. Use rds_ib_ring_low() instead of ring_empty to decide
+ * if we should requeue.
+ */
+ if (rds_conn_up(conn) &&
+ ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
+ rds_ib_ring_empty(&ic->i_recv_ring))) {
+ queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
+ }
}
/*
@@ -982,10 +1026,17 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
}
/*
- * It's very important that we only free this ring entry if we've truly
- * freed the resources allocated to the entry. The refilling path can
- * leak if we don't.
+ * rds_ib_process_recv() doesn't always consume the frag, and
+ * we might not have called it at all if the wc didn't indicate
+ * success. We already unmapped the frag's pages, though, and
+ * the following rds_ib_ring_free() call tells the refill path
+ * that it will not find an allocated frag here. Make sure we
+ * keep that promise by freeing a frag that's still on the ring.
*/
+ if (recv->r_frag) {
+ rds_ib_frag_free(ic, recv->r_frag);
+ recv->r_frag = NULL;
+ }
rds_ib_ring_free(&ic->i_recv_ring, 1);
}
}
@@ -1016,7 +1067,7 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
rds_ib_stats_inc(s_ib_rx_ring_empty);
if (rds_ib_ring_low(&ic->i_recv_ring))
- rds_ib_recv_refill(conn, 0);
+ rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
}
int rds_ib_recv(struct rds_connection *conn)
@@ -1025,8 +1076,10 @@ int rds_ib_recv(struct rds_connection *conn)
int ret = 0;
rdsdebug("conn %p\n", conn);
- if (rds_conn_up(conn))
+ if (rds_conn_up(conn)) {
rds_ib_attempt_ack(ic);
+ rds_ib_recv_refill(conn, 0, GFP_KERNEL);
+ }
return ret;
}
@@ -1049,9 +1102,10 @@ int rds_ib_recv_init(void)
rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
sizeof(struct rds_page_frag),
0, SLAB_HWCACHE_ALIGN, NULL);
- if (!rds_ib_frag_slab)
+ if (!rds_ib_frag_slab) {
kmem_cache_destroy(rds_ib_incoming_slab);
- else
+ rds_ib_incoming_slab = NULL;
+ } else
ret = 0;
out:
return ret;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 5d0a704fa039..c576ebeb4115 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -709,6 +709,11 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_op = ic->i_data_op;
prev->s_wr.send_flags |= IB_SEND_SOLICITED;
+ if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
+ ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
+ prev->s_wr.send_flags |= IB_SEND_SIGNALED;
+ nr_sig++;
+ }
ic->i_data_op = NULL;
}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 40084d843e9f..4c93badeabf2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -435,9 +435,10 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
/* If the MR was marked as invalidate, this will
* trigger an async flush. */
- if (zot_me)
+ if (zot_me) {
rds_destroy_mr(mr);
- rds_mr_put(mr);
+ rds_mr_put(mr);
+ }
}
void rds_rdma_free_op(struct rm_rdma_op *ro)
@@ -451,7 +452,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
* is the case for a RDMA_READ which copies from remote
* to local memory */
if (!ro->op_write) {
- BUG_ON(irqs_disabled());
+ WARN_ON(!page->mapping && irqs_disabled());
set_page_dirty(page);
}
put_page(page);
@@ -658,6 +659,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
if (ret < 0)
goto out;
+ else
+ ret = 0;
rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
nr_bytes, nr, iov->bytes, iov->addr);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 208240836043..b9b40af5345b 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -34,6 +34,7 @@
#include <rdma/rdma_cm.h>
#include "rdma_transport.h"
+#include "ib.h"
static struct rdma_cm_id *rds_rdma_listen_id;
@@ -82,8 +83,18 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
- /* XXX worry about racing with listen acceptance */
- ret = trans->cm_initiate_connect(cm_id);
+ /* Connection could have been dropped so make sure the
+ * cm_id is valid before proceeding
+ */
+ if (conn) {
+ struct rds_ib_connection *ibic;
+
+ ibic = conn->c_transport_data;
+ if (ibic && ibic->i_cm_id == cm_id)
+ ret = trans->cm_initiate_connect(cm_id);
+ else
+ rds_conn_drop(conn);
+ }
break;
case RDMA_CM_EVENT_ESTABLISHED:
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9005fb0586f6..afb4048d0cfd 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
#define RDS_LL_SEND_FULL 0
#define RDS_RECONNECT_PENDING 1
#define RDS_IN_XMIT 2
+#define RDS_RECV_REFILL 3
struct rds_connection {
struct hlist_node c_hash_node;
diff --git a/net/rds/send.c b/net/rds/send.c
index 2581b8e3dbe7..4df61a515b83 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -282,26 +282,34 @@ restart:
/* The transport either sends the whole rdma or none of it */
if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
rm->m_final_op = &rm->rdma;
+ /* The transport owns the mapped memory for now.
+ * You can't unmap it while it's on the send queue
+ */
+ set_bit(RDS_MSG_MAPPED, &rm->m_flags);
ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
- if (ret)
+ if (ret) {
+ clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
+ wake_up_interruptible(&rm->m_flush_wait);
break;
+ }
conn->c_xmit_rdma_sent = 1;
- /* The transport owns the mapped memory for now.
- * You can't unmap it while it's on the send queue */
- set_bit(RDS_MSG_MAPPED, &rm->m_flags);
}
if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
rm->m_final_op = &rm->atomic;
+ /* The transport owns the mapped memory for now.
+ * You can't unmap it while it's on the send queue
+ */
+ set_bit(RDS_MSG_MAPPED, &rm->m_flags);
ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
- if (ret)
+ if (ret) {
+ clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
+ wake_up_interruptible(&rm->m_flush_wait);
break;
+ }
conn->c_xmit_atomic_sent = 1;
- /* The transport owns the mapped memory for now.
- * You can't unmap it while it's on the send queue */
- set_bit(RDS_MSG_MAPPED, &rm->m_flags);
}
/*
@@ -411,7 +419,8 @@ over_batch:
*/
if (ret == 0) {
smp_mb();
- if (!list_empty(&conn->c_send_queue) &&
+ if ((test_bit(0, &conn->c_map_queued) ||
+ !list_empty(&conn->c_send_queue)) &&
send_gen == conn->c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
goto restart;
@@ -769,8 +778,22 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
while (!list_empty(&list)) {
rm = list_entry(list.next, struct rds_message, m_sock_item);
list_del_init(&rm->m_sock_item);
-
rds_message_wait(rm);
+
+ /* just in case the code above skipped this message
+ * because RDS_MSG_ON_CONN wasn't set, run it again here
+ * taking m_rs_lock is the only thing that keeps us
+ * from racing with ack processing.
+ */
+ spin_lock_irqsave(&rm->m_rs_lock, flags);
+
+ spin_lock(&rs->rs_lock);
+ __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
+ spin_unlock(&rs->rs_lock);
+
+ rm->m_rs = NULL;
+ spin_unlock_irqrestore(&rm->m_rs_lock, flags);
+
rds_message_put(rm);
}
}
@@ -992,6 +1015,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
goto out;
}
+ if (payload_len > rds_sk_sndbuf(rs)) {
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
/* size of rm including all sgs */
ret = rds_rm_size(msg, payload_len);
if (ret < 0)
@@ -1064,11 +1092,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
dport, &queued)) {
rds_stats_inc(s_send_queue_full);
- /* XXX make sure this is reasonable */
- if (payload_len > rds_sk_sndbuf(rs)) {
- ret = -EMSGSIZE;
- goto out;
- }
+
if (nonblock) {
ret = -EAGAIN;
goto out;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b087087ccfa9..06e7c4a37245 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,7 +36,7 @@ static void free_tcf(struct rcu_head *head)
kfree(p);
}
-void tcf_hash_destroy(struct tc_action *a)
+static void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
struct tcf_hashinfo *hinfo = a->ops->hinfo;
@@ -52,7 +52,6 @@ void tcf_hash_destroy(struct tc_action *a)
*/
call_rcu(&p->tcfc_rcu, free_tcf);
}
-EXPORT_SYMBOL(tcf_hash_destroy);
int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1b97dabc621a..559bfa011bda 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
struct tcf_bpf *prog = act->priv;
+ struct bpf_prog *filter;
int action, filter_res;
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
- spin_lock(&prog->tcf_lock);
-
- prog->tcf_tm.lastuse = jiffies;
- bstats_update(&prog->tcf_bstats, skb);
+ tcf_lastuse_update(&prog->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- /* Needed here for accessing maps. */
rcu_read_lock();
+ filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
@@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
case TC_ACT_SHOT:
action = filter_res;
- prog->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
@@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
}
- spin_unlock(&prog->tcf_lock);
return action;
}
@@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
struct tcf_bpf_cfg *cfg)
{
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
- cfg->filter = prog->filter;
+ /* updates to prog->filter are prevented, since it's called either
+ * with rtnl lock or during final cleanup in rcu callback
+ */
+ cfg->filter = rcu_dereference_protected(prog->filter, 1);
cfg->bpf_ops = prog->bpf_ops;
cfg->bpf_name = prog->bpf_name;
@@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind, false);
+ sizeof(*prog), bind, true);
if (ret < 0)
return ret;
@@ -325,9 +326,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out;
prog = to_bpf(act);
- spin_lock_bh(&prog->tcf_lock);
+ ASSERT_RTNL();
- if (ret != ACT_P_CREATED)
+ if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);
prog->bpf_ops = cfg.bpf_ops;
@@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
- prog->filter = cfg.filter;
-
- spin_unlock_bh(&prog->tcf_lock);
+ rcu_assign_pointer(prog->filter, cfg.filter);
- if (res == ACT_P_CREATED)
+ if (res == ACT_P_CREATED) {
tcf_hash_insert(act);
- else
+ } else {
+ /* make sure the program being replaced is no longer executing */
+ synchronize_rcu();
tcf_bpf_cfg_cleanup(&old);
+ }
return res;
out:
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f2b540220ad0..5019a47b9270 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = a->priv;
+ struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
@@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
proto, &tuple))
goto out;
- thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+ zone.id = ca->zone;
+ zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+ thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
if (!thash)
goto out;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 5be0b3c1c5b0..b7c4ead8b5a8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -162,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
+ true);
break;
}
case IPPROTO_UDP:
@@ -178,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
udph = (void *)(skb_network_header(skb) + ihl);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
- new_addr, 1);
+ new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
@@ -231,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
iph->saddr = new_addr;
inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
- 0);
+ false);
break;
}
default:
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 02fa82792dab..f9c9fc075fe6 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
-static void
-rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
+ struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+
tcf_exts_destroy(&f->exts);
- kfree_rcu(f, rcu);
+ kfree(f);
+}
+
+static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+ tcf_unbind_filter(tp, &f->res);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
static bool rsvp_destroy(struct tcf_proto *tp, bool force)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index a557dbaf5afe..944c8ff45055 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -27,6 +27,7 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
+ struct rcu_head rcu;
};
struct tcindex_filter {
@@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
-static int
-tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static void tcindex_destroy_rexts(struct rcu_head *head)
+{
+ struct tcindex_filter_result *r;
+
+ r = container_of(head, struct tcindex_filter_result, rcu);
+ tcf_exts_destroy(&r->exts);
+}
+
+static void tcindex_destroy_fexts(struct rcu_head *head)
+{
+ struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
+
+ tcf_exts_destroy(&f->result.exts);
+ kfree(f);
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
@@ -162,9 +178,14 @@ found:
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
- tcf_exts_destroy(&r->exts);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
if (f)
- kfree_rcu(f, rcu);
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
return 0;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cab9e9b43967..4fbb67430ce4 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force)
return false;
}
}
+
+ if (tp_c->refcnt > 1)
+ return false;
+
+ if (tp_c->refcnt == 1) {
+ struct tc_u_hnode *ht;
+
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
+ if (!ht_empty(ht))
+ return false;
+ }
}
if (root_ht && --root_ht->refcnt == 0)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f06aa01d60fd..f43c8f33f09e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1806,51 +1806,45 @@ done:
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
*/
-int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res, bool compat_mode)
{
__be16 protocol = tc_skb_protocol(skb);
- int err;
+#ifdef CONFIG_NET_CLS_ACT
+ const struct tcf_proto *old_tp = tp;
+ int limit = 0;
+reclassify:
+#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ int err;
+
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
- err = tp->classify(skb, tp, res);
+ err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+ if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
+ goto reset;
+#endif
if (err >= 0)
return err;
}
- return -1;
-}
-EXPORT_SYMBOL(tc_classify_compat);
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- int err = 0;
-#ifdef CONFIG_NET_CLS_ACT
- const struct tcf_proto *otp = tp;
- int limit = 0;
-reclassify:
-#endif
-
- err = tc_classify_compat(skb, tp, res);
+ return -1;
#ifdef CONFIG_NET_CLS_ACT
- if (err == TC_ACT_RECLASSIFY) {
- tp = otp;
-
- if (unlikely(limit++ >= MAX_REC_LOOP)) {
- net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
- tp->q->ops->id,
- tp->prio & 0xffff,
- ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
- goto reclassify;
+reset:
+ if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->q->ops->id, tp->prio & 0xffff,
+ ntohs(tp->protocol));
+ return TC_ACT_SHOT;
}
+
+ tp = old_tp;
+ goto reclassify;
#endif
- return err;
}
EXPORT_SYMBOL(tc_classify);
@@ -1947,6 +1941,7 @@ static int __init pktsched_init(void)
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
register_qdisc(&mq_qdisc_ops);
+ register_qdisc(&noqueue_qdisc_ops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index e3e2cc5fd068..1911af3ca7c0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -375,7 +375,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beeb75f80fdb..c538d9e4a8f6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -240,7 +240,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 6a783afe4960..665bde07916b 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -201,7 +201,7 @@ static bool choke_classify(struct sk_buff *skb,
int result;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 338706092c27..f26bdea875c1 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -331,7 +331,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 66700a6116aa..c4d45fd8c551 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -230,7 +230,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tc_classify(skb, fl, &res);
+ int result = tc_classify(skb, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2e2398cfc694..2177eac0a61e 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -54,7 +54,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
- u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
+ u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a9ba030435a2..4c834e93dafb 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -92,7 +92,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, filter, &res);
+ result = tc_classify(skb, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 942fea8405a4..cb5d4ad32946 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -416,33 +416,25 @@ struct Qdisc noop_qdisc = {
};
EXPORT_SYMBOL(noop_qdisc);
-static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
+static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt)
+{
+ /* register_qdisc() assigns a default of noop_enqueue if unset,
+ * but __dev_queue_xmit() treats noqueue only as such
+ * if this is NULL - so clear it here. */
+ qdisc->enqueue = NULL;
+ return 0;
+}
+
+struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.id = "noqueue",
.priv_size = 0,
+ .init = noqueue_init,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.peek = noop_dequeue,
.owner = THIS_MODULE,
};
-static struct Qdisc noqueue_qdisc;
-static struct netdev_queue noqueue_netdev_queue = {
- .qdisc = &noqueue_qdisc,
- .qdisc_sleeping = &noqueue_qdisc,
-};
-
-static struct Qdisc noqueue_qdisc = {
- .enqueue = NULL,
- .dequeue = noop_dequeue,
- .flags = TCQ_F_BUILTIN,
- .ops = &noqueue_qdisc_ops,
- .list = LIST_HEAD_INIT(noqueue_qdisc.list),
- .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
- .dev_queue = &noqueue_netdev_queue,
- .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
-};
-
-
static const u8 prio2band[TC_PRIO_MAX + 1] = {
1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
};
@@ -733,18 +725,19 @@ static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
{
- struct Qdisc *qdisc = &noqueue_qdisc;
+ struct Qdisc *qdisc;
+ const struct Qdisc_ops *ops = default_qdisc_ops;
- if (dev->tx_queue_len && !(dev->priv_flags & IFF_NO_QUEUE)) {
- qdisc = qdisc_create_dflt(dev_queue,
- default_qdisc_ops, TC_H_ROOT);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
- return;
- }
- if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ if (dev->priv_flags & IFF_NO_QUEUE)
+ ops = &noqueue_qdisc_ops;
+
+ qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
+ if (!qdisc) {
+ netdev_info(dev, "activation failed\n");
+ return;
}
+ if (!netif_is_multiqueue(dev))
+ qdisc->flags |= TCQ_F_ONETXQUEUE;
dev_queue->qdisc_sleeping = qdisc;
}
@@ -756,7 +749,6 @@ static void attach_default_qdiscs(struct net_device *dev)
txq = netdev_get_tx_queue(dev, 0);
if (!netif_is_multiqueue(dev) ||
- dev->tx_queue_len == 0 ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
@@ -781,7 +773,7 @@ static void transition_one_qdisc(struct net_device *dev,
clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
- if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
+ if (need_watchdog_p) {
dev_queue->trans_start = 0;
*need_watchdog_p = 1;
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index abb9f2fec28f..80105109f756 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -512,11 +512,9 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_LIMIT])
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- else {
- u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
-
- sch->limit = qlen * psched_mtu(qdisc_dev(sch));
- }
+ else
+ sch->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index e6c7416d0332..b7ebe2c87586 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1165,7 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f1acb0f60dc3..15ccd7f8fb2a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -229,7 +229,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1048,11 +1048,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
- else {
+ else
q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
- q->direct_qlen = 2;
- }
+
if ((q->rate2quantum = gopt->rate2quantum) < 1)
q->rate2quantum = 1;
q->defcls = gopt->defcls;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 42dd218871e0..4e904ca0af9d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index ade9445a55ab..5abfe44678d4 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -130,12 +130,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
q->unplug_indefinite = false;
if (opt == NULL) {
- /* We will set a default limit of 100 pkts (~150kB)
- * in case tx_queue_len is not available. The
- * default value is completely arbitrary.
- */
- u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100;
- q->limit = pkt_limit * psched_mtu(qdisc_dev(sch));
+ q->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
} else {
struct tc_plug_qopt *ctl = nla_data(opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8e5cd34aaa74..ba6487f2741f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index ffaeea63d473..3dc3a6e56052 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -717,7 +717,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 4b815193326c..5bbb6332ec57 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -258,7 +258,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -502,7 +502,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
limit = ctl->limit;
if (limit == 0)
- limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+ limit = qdisc_dev(sch)->tx_queue_len;
child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
if (IS_ERR(child))
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 52f75a5473e1..3abab534eb5c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -179,7 +179,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 06320c8c1c86..a655ddc3f353 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3132,11 +3132,18 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
case SCTP_PARAM_IPV4_ADDRESS:
if (length != sizeof(sctp_ipv4addr_param_t))
return false;
+ /* ensure there is only one addr param and it's in the
+ * beginning of addip_hdr params, or we reject it.
+ */
+ if (param.v != addip->addip_hdr.params)
+ return false;
addr_param_seen = true;
break;
case SCTP_PARAM_IPV6_ADDRESS:
if (length != sizeof(sctp_ipv6addr_param_t))
return false;
+ if (param.v != addip->addip_hdr.params)
+ return false;
addr_param_seen = true;
break;
case SCTP_PARAM_ADD_IP:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fef2acdf4a2e..85e6f03aeb70 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -702,7 +702,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
* outstanding data and rely on the retransmission limit be reached
* to shutdown the association.
*/
- if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
+ if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING)
t->asoc->overall_error_count = 0;
/* Clear the hb_sent flag to signal that we had a good
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f067e5425560..75db07c78a69 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -351,11 +351,11 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
l->state = LINK_RESET;
break;
case LINK_ESTABLISH_EVT:
+ case LINK_SYNCH_END_EVT:
break;
case LINK_SYNCH_BEGIN_EVT:
l->state = LINK_SYNCHING;
break;
- case LINK_SYNCH_END_EVT:
case LINK_FAILOVER_BEGIN_EVT:
case LINK_FAILOVER_END_EVT:
default:
@@ -1330,6 +1330,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
+ u16 rcv_nxt = l->rcv_nxt;
char *if_name;
int rc = 0;
@@ -1393,7 +1394,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
/* Send NACK if peer has sent pkts we haven't received yet */
- if (more(peers_snd_nxt, l->rcv_nxt))
+ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
rcvgap = peers_snd_nxt - l->rcv_nxt;
if (rcvgap || (msg_probe(hdr)))
tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7c191641b44f..703875fd6cde 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -423,6 +423,8 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
/* There is still a working link => initiate failover */
tnl = node_active_link(n, 0);
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
tipc_link_reset(l);
@@ -565,6 +567,8 @@ void tipc_node_check_dest(struct net *net, u32 onode,
goto exit;
}
tipc_link_reset(l);
+ if (n->state == NODE_FAILINGOVER)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
@@ -1075,7 +1079,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
u16 exp_pkts = msg_msgcnt(hdr);
u16 rcv_nxt, syncpt, dlv_nxt;
int state = n->state;
- struct tipc_link *l, *pl = NULL;
+ struct tipc_link *l, *tnl, *pl = NULL;
struct tipc_media_addr *maddr;
int i, pb_id;
@@ -1129,7 +1133,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
}
/* Open parallel link when tunnel link reaches synch point */
- if ((n->state == NODE_FAILINGOVER) && !tipc_link_is_failingover(l)) {
+ if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) {
if (!more(rcv_nxt, n->sync_point))
return true;
tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT);
@@ -1138,6 +1142,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
return true;
}
+ /* No synching needed if only one link */
+ if (!pl || !tipc_link_is_up(pl))
+ return true;
+
/* Initiate or update synch mode if applicable */
if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) {
syncpt = iseqno + exp_pkts - 1;
@@ -1156,13 +1164,20 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
/* Open tunnel link when parallel link reaches synch point */
if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) {
- if (pl)
- dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq));
- if (!pl || more(dlv_nxt, n->sync_point)) {
- tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT);
+ if (tipc_link_is_synching(l)) {
+ tnl = l;
+ } else {
+ tnl = pl;
+ pl = l;
+ }
+ dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq));
+ if (more(dlv_nxt, n->sync_point)) {
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
return true;
}
+ if (l == pl)
+ return true;
if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG))
return true;
if (usr == LINK_PROTOCOL)