diff options
Diffstat (limited to 'net')
218 files changed, 5256 insertions, 2743 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 01d7ba840df8..fded86508117 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -791,10 +791,9 @@ void vlan_setup(struct net_device *dev) { ether_setup(dev); - dev->priv_flags |= IFF_802_1Q_VLAN; + dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); - dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; dev->destructor = vlan_dev_free; diff --git a/net/9p/client.c b/net/9p/client.c index 498454b3c06c..ea79ee9a7348 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1541,6 +1541,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (unsigned long long) offset, (int)iov_iter_count(to)); @@ -1620,6 +1621,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", fid->fid, (unsigned long long) offset, diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 753383c2215c..912d9c36fb1c 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -77,8 +77,7 @@ enum batadv_dup_status { * @lq_index: index to store the value at * @value: value to store in the ring buffer */ -static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) +static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value) { lq_recv[*lq_index] = value; *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; @@ -91,12 +90,12 @@ static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, * * Returns computed average value. */ -static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) { - const uint8_t *ptr; - uint16_t count = 0; - uint16_t i = 0; - uint16_t sum = 0; + const u8 *ptr; + u16 count = 0; + u16 i = 0; + u16 sum = 0; ptr = lq_recv; @@ -113,7 +112,7 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) if (count == 0) return 0; - return (uint8_t)(sum / count); + return (u8)(sum / count); } /** @@ -155,14 +154,14 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, kfree(orig_node->bat_iv.bcast_own); orig_node->bat_iv.bcast_own = data_ptr; - data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); if (!data_ptr) { kfree(orig_node->bat_iv.bcast_own); goto unlock; } memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, - (max_if_num - 1) * sizeof(uint8_t)); + (max_if_num - 1) * sizeof(u8)); kfree(orig_node->bat_iv.bcast_own_sum); orig_node->bat_iv.bcast_own_sum = data_ptr; @@ -215,19 +214,19 @@ free_bcast_own: if (max_if_num == 0) goto free_own_sum; - data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC); if (!data_ptr) { kfree(orig_node->bat_iv.bcast_own); goto unlock; } memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum, - del_if_num * sizeof(uint8_t)); + del_if_num * sizeof(u8)); - if_offset = (del_if_num + 1) * sizeof(uint8_t); - memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t), + if_offset = (del_if_num + 1) * sizeof(u8); + memcpy((char *)data_ptr + del_if_num * sizeof(u8), orig_node->bat_iv.bcast_own_sum + if_offset, - (max_if_num - del_if_num) * sizeof(uint8_t)); + (max_if_num - del_if_num) * sizeof(u8)); free_own_sum: kfree(orig_node->bat_iv.bcast_own_sum); @@ -250,7 +249,7 @@ unlock: * If the object does not exists it is created an initialised. */ static struct batadv_orig_node * -batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) +batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; int size, hash_added; @@ -270,7 +269,7 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr) if (!orig_node->bat_iv.bcast_own) goto free_orig_node; - size = bat_priv->num_ifaces * sizeof(uint8_t); + size = bat_priv->num_ifaces * sizeof(u8); orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC); if (!orig_node->bat_iv.bcast_own_sum) goto free_orig_node; @@ -293,43 +292,17 @@ free_orig_node: static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, + const u8 *neigh_addr, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_neigh_node *neigh_node, *tmp_neigh_node; + struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node); + neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr); if (!neigh_node) goto out; - if (!atomic_inc_not_zero(&hard_iface->refcount)) { - kfree(neigh_node); - neigh_node = NULL; - goto out; - } - neigh_node->orig_node = orig_neigh; - neigh_node->if_incoming = hard_iface; - - spin_lock_bh(&orig_node->neigh_list_lock); - tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface, - neigh_addr); - if (!tmp_neigh_node) { - hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); - } else { - kfree(neigh_node); - batadv_hardif_free_ref(hard_iface); - neigh_node = tmp_neigh_node; - } - spin_unlock_bh(&orig_node->neigh_list_lock); - - if (!tmp_neigh_node) - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM for orig_node %pM on interface %s\n", - neigh_addr, orig_node->orig, - hard_iface->net_dev->name); out: return neigh_node; @@ -339,7 +312,7 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; unsigned char *ogm_buff; - uint32_t random_seqno; + u32 random_seqno; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); @@ -411,8 +384,7 @@ static unsigned long batadv_iv_ogm_fwd_send_time(void) } /* apply hop penalty for a normal link */ -static uint8_t batadv_hop_penalty(uint8_t tq, - const struct batadv_priv *bat_priv) +static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) { int hop_penalty = atomic_read(&bat_priv->hop_penalty); int new_tq; @@ -442,11 +414,11 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); const char *fwd_str; - uint8_t packet_num; - int16_t buff_pos; + u8 packet_num; + s16 buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; - uint8_t *packet_pos; + u8 *packet_pos; if (hard_iface->if_status != BATADV_IF_ACTIVE) return; @@ -837,7 +809,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - uint16_t tvlv_len; + u16 tvlv_len; if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); @@ -896,9 +868,9 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) struct hlist_head *head; struct batadv_orig_node *orig_node; unsigned long *word; - uint32_t i; + u32 i; size_t word_index; - uint8_t *w; + u8 *w; int if_num; for (i = 0; i < hash->size; i++) { @@ -927,8 +899,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if, *tmp_hard_iface; int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; - uint32_t seqno; - uint16_t tvlv_len = 0; + u32 seqno; + u16 tvlv_len = 0; unsigned long send_time; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -947,7 +919,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->tvlv_len = htons(tvlv_len); /* change sequence number to network order */ - seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno); + seqno = (u32)atomic_read(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->seqno = htonl(seqno); atomic_inc(&hard_iface->bat_iv.ogm_seqno); @@ -970,7 +942,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) rcu_read_lock(); list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) - continue; + continue; batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, hard_iface, tmp_hard_iface, 1, send_time); @@ -1006,13 +978,14 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, { struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL; - struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct batadv_neigh_node *neigh_node = NULL; + struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; int if_num; - uint8_t sum_orig, sum_neigh; - uint8_t *neigh_addr; - uint8_t tq_avg; + u8 sum_orig, sum_neigh; + u8 *neigh_addr; + u8 tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); @@ -1164,8 +1137,8 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - uint8_t total_count; - uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; + u8 total_count; + u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; unsigned int combined_tq; @@ -1311,13 +1284,13 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; int is_dup; - int32_t seq_diff; + s32 seq_diff; int need_update = 0; int set_mark; enum batadv_dup_status ret = BATADV_NO_DUP; - uint32_t seqno = ntohl(batadv_ogm_packet->seqno); - uint8_t *neigh_addr; - uint8_t packet_count; + u32 seqno = ntohl(batadv_ogm_packet->seqno); + u8 *neigh_addr; + u8 packet_count; unsigned long *bitmap; orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); @@ -1406,7 +1379,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batadv_neigh_node *router = NULL, *router_router = NULL; + struct batadv_neigh_node *router = NULL; + struct batadv_neigh_node *router_router = NULL; struct batadv_orig_node *orig_neigh_node; struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *orig_neigh_router = NULL; @@ -1418,7 +1392,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, bool sameseq, similar_ttl; struct sk_buff *skb_priv; struct ethhdr *ethhdr; - uint8_t *prev_sender; + u8 *prev_sender; int is_bidirect; /* create a private copy of the skb, as some functions change tq value @@ -1600,7 +1574,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_orig_node *orig_neigh_node, *orig_node; struct batadv_hard_iface *hard_iface; struct batadv_ogm_packet *ogm_packet; - uint32_t if_incoming_seqno; + u32 if_incoming_seqno; bool has_directlink_flag; struct ethhdr *ethhdr; bool is_my_oldorig = false; @@ -1673,9 +1647,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; int offset; - int32_t bit_pos; - int16_t if_num; - uint8_t *weight; + s32 bit_pos; + s16 if_num; + u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); @@ -1751,7 +1725,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_ogm_packet *ogm_packet; - uint8_t *packet_pos; + u8 *packet_pos; int ogm_offset; bool ret; @@ -1835,7 +1809,7 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, unsigned long last_seen_jiffies; struct hlist_head *head; int batman_count = 0; - uint32_t i; + u32 i; seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE, @@ -1903,7 +1877,7 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - uint8_t tq1, tq2; + u8 tq1, tq2; int diff; neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); @@ -1945,7 +1919,7 @@ batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing2) { struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - uint8_t tq1, tq2; + u8 tq1, tq2; bool ret; neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index cf68c328345e..25cbc36e997a 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -21,7 +21,7 @@ #include <linux/bitmap.h> /* shift the packet array by n places. */ -static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) +static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) { if (n <= 0 || n >= BATADV_TQ_LOCAL_WINDOW_SIZE) return; @@ -35,8 +35,8 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) * 1 if the window was moved (either new or very old) * 0 if the window was not moved/shifted. */ -int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int set_mark) +int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, + int set_mark) { struct batadv_priv *bat_priv = priv; diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 0c2456225fae..0226b220fe5b 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -28,9 +28,9 @@ * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, - uint32_t last_seqno, uint32_t curr_seqno) + u32 last_seqno, u32 curr_seqno) { - int32_t diff; + s32 diff; diff = last_seqno - curr_seqno; if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) @@ -39,7 +39,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, } /* turn corresponding bit on, so we can remember that we got the packet */ -static inline void batadv_set_bit(unsigned long *seq_bits, int32_t n) +static inline void batadv_set_bit(unsigned long *seq_bits, s32 n) { /* if too old, just drop it */ if (n < 0 || n >= BATADV_TQ_LOCAL_WINDOW_SIZE) @@ -51,7 +51,7 @@ static inline void batadv_set_bit(unsigned long *seq_bits, int32_t n) /* receive and process one packet, returns 1 if received seq_num is considered * new, 0 if old */ -int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int set_mark); +int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, + int set_mark); #endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba0609292ae7..191a70290dca 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -51,7 +51,7 @@ #include "packet.h" #include "translation-table.h" -static const uint8_t batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; +static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; static void batadv_bla_periodic_work(struct work_struct *work); static void @@ -59,10 +59,10 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv, struct batadv_bla_backbone_gw *backbone_gw); /* return the index of the claim */ -static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) +static inline u32 batadv_choose_claim(const void *data, u32 size) { struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; - uint32_t hash = 0; + u32 hash = 0; hash = jhash(&claim->addr, sizeof(claim->addr), hash); hash = jhash(&claim->vid, sizeof(claim->vid), hash); @@ -71,11 +71,10 @@ static inline uint32_t batadv_choose_claim(const void *data, uint32_t size) } /* return the index of the backbone gateway */ -static inline uint32_t batadv_choose_backbone_gw(const void *data, - uint32_t size) +static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) { const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; - uint32_t hash = 0; + u32 hash = 0; hash = jhash(&claim->addr, sizeof(claim->addr), hash); hash = jhash(&claim->vid, sizeof(claim->vid), hash); @@ -89,7 +88,8 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node, { const void *data1 = container_of(node, struct batadv_bla_backbone_gw, hash_entry); - const struct batadv_bla_backbone_gw *gw1 = data1, *gw2 = data2; + const struct batadv_bla_backbone_gw *gw1 = data1; + const struct batadv_bla_backbone_gw *gw2 = data2; if (!batadv_compare_eth(gw1->orig, gw2->orig)) return 0; @@ -106,7 +106,8 @@ static int batadv_compare_claim(const struct hlist_node *node, { const void *data1 = container_of(node, struct batadv_bla_claim, hash_entry); - const struct batadv_bla_claim *cl1 = data1, *cl2 = data2; + const struct batadv_bla_claim *cl1 = data1; + const struct batadv_bla_claim *cl2 = data2; if (!batadv_compare_eth(cl1->addr, cl2->addr)) return 0; @@ -192,8 +193,8 @@ static struct batadv_bla_claim * Returns claim if found or NULL otherwise. */ static struct batadv_bla_backbone_gw * -batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, unsigned short vid) +batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -269,14 +270,14 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ -static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, +static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; struct batadv_hard_iface *primary_if; struct net_device *soft_iface; - uint8_t *hw_src; + u8 *hw_src; struct batadv_bla_claim_dst local_claim_dest; __be32 zeroip = 0; @@ -304,13 +305,13 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, * with XX = claim type * and YY:YY = group id */ - (uint8_t *)&local_claim_dest); + (u8 *)&local_claim_dest); if (!skb) goto out; ethhdr = (struct ethhdr *)skb->data; - hw_src = (uint8_t *)ethhdr + ETH_HLEN + sizeof(struct arphdr); + hw_src = (u8 *)ethhdr + ETH_HLEN + sizeof(struct arphdr); /* now we pretend that the client would have sent this ... */ switch (claimtype) { @@ -383,7 +384,7 @@ out: * be found. */ static struct batadv_bla_backbone_gw * -batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, +batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; @@ -552,7 +553,7 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) static void batadv_bla_send_announce(struct batadv_priv *bat_priv, struct batadv_bla_backbone_gw *backbone_gw) { - uint8_t mac[ETH_ALEN]; + u8 mac[ETH_ALEN]; __be16 crc; memcpy(mac, batadv_announce_mac, 4); @@ -571,7 +572,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const unsigned short vid, + const u8 *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -635,7 +636,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const unsigned short vid) + const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -659,12 +660,11 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, } /* check for ANNOUNCE frame, return 1 if handled */ -static int batadv_handle_announce(struct batadv_priv *bat_priv, - uint8_t *an_addr, uint8_t *backbone_addr, - unsigned short vid) +static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, + u8 *backbone_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; - uint16_t crc; + u16 crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) return 0; @@ -708,8 +708,8 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, /* check for REQUEST frame, return 1 if handled */ static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - uint8_t *backbone_addr, - struct ethhdr *ethhdr, unsigned short vid) + u8 *backbone_addr, struct ethhdr *ethhdr, + unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -732,8 +732,8 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, /* check for UNCLAIM frame, return 1 if handled */ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - uint8_t *backbone_addr, - uint8_t *claim_addr, unsigned short vid) + u8 *backbone_addr, u8 *claim_addr, + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -761,7 +761,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* check for CLAIM frame, return 1 if handled */ static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - uint8_t *backbone_addr, uint8_t *claim_addr, + u8 *backbone_addr, u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -805,10 +805,10 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, */ static int batadv_check_claim_group(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - uint8_t *hw_src, uint8_t *hw_dst, + u8 *hw_src, u8 *hw_dst, struct ethhdr *ethhdr) { - uint8_t *backbone_addr; + u8 *backbone_addr; struct batadv_orig_node *orig_node; struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; @@ -877,7 +877,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; - uint8_t *hw_src, *hw_dst; + u8 *hw_src, *hw_dst; struct vlan_hdr *vhdr, vhdr_buf; struct ethhdr *ethhdr; struct arphdr *arphdr; @@ -923,7 +923,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, /* pskb_may_pull() may have modified the pointers, get ethhdr again */ ethhdr = eth_hdr(skb); - arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); + arphdr = (struct arphdr *)((u8 *)ethhdr + headlen); /* Check whether the ARP frame carries a valid * IP information @@ -937,7 +937,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (arphdr->ar_pln != 4) return 0; - hw_src = (uint8_t *)arphdr + sizeof(struct arphdr); + hw_src = (u8 *)arphdr + sizeof(struct arphdr); hw_dst = hw_src + ETH_ALEN + 4; bla_dst = (struct batadv_bla_claim_dst *)hw_dst; bla_dst_own = &bat_priv->bla.claim_dest; @@ -1238,9 +1238,9 @@ static struct lock_class_key batadv_backbone_hash_lock_class_key; int batadv_bla_init(struct batadv_priv *bat_priv) { int i; - uint8_t claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00}; + u8 claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00}; struct batadv_hard_iface *primary_if; - uint16_t crc; + u16 crc; unsigned long entrytime; spin_lock_init(&bat_priv->bla.bcast_duplist_lock); @@ -1368,7 +1368,7 @@ out: * * Returns true if orig is a backbone for this vid, false otherwise. */ -bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, +bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; @@ -1647,9 +1647,9 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_head *head; - uint32_t i; + u32 i; bool is_own; - uint8_t *primary_addr; + u8 *primary_addr; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) @@ -1692,9 +1692,9 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hard_iface *primary_if; struct hlist_head *head; int secs, msecs; - uint32_t i; + u32 i; bool is_own; - uint8_t *primary_addr; + u8 *primary_addr; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 0282690389ac..025152b34282 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -22,9 +22,6 @@ #include <linux/types.h> -struct batadv_hard_iface; -struct batadv_orig_node; -struct batadv_priv; struct seq_file; struct sk_buff; @@ -38,7 +35,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); -bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, +bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, struct sk_buff *skb); @@ -84,8 +81,7 @@ static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, } static inline bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, - uint8_t *orig, - unsigned short vid) + u8 *orig, unsigned short vid) { return false; } diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 187acdc85dfa..80ab8d6f0ab3 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -22,7 +22,6 @@ #include <linux/kconfig.h> -struct batadv_hard_iface; struct net_device; #define BATADV_DEBUGFS_SUBDIR "batman_adv" diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index cc7d87d64987..83bc1aaf5800 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -102,7 +102,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, struct batadv_dat_entry *dat_entry; struct hlist_node *node_tmp; struct hlist_head *head; - uint32_t i; + u32 i; if (!bat_priv->dat.hash) return; @@ -168,11 +168,11 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * * Returns the value of the hw_src field in the ARP packet. */ -static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) +static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { - uint8_t *addr; + u8 *addr; - addr = (uint8_t *)(skb->data + hdr_size); + addr = (u8 *)(skb->data + hdr_size); addr += ETH_HLEN + sizeof(struct arphdr); return addr; @@ -197,7 +197,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * * Returns the value of the hw_dst field in the ARP packet. */ -static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) +static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4; } @@ -221,12 +221,12 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * * Returns the selected index in the hash table for the given data. */ -static uint32_t batadv_hash_dat(const void *data, uint32_t size) +static u32 batadv_hash_dat(const void *data, u32 size) { - uint32_t hash = 0; + u32 hash = 0; const struct batadv_dat_entry *dat = data; const unsigned char *key; - uint32_t i; + u32 i; key = (const unsigned char *)&dat->ip; for (i = 0; i < sizeof(dat->ip); i++) { @@ -265,7 +265,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, struct hlist_head *head; struct batadv_dat_entry to_find, *dat_entry, *dat_entry_tmp = NULL; struct batadv_hashtable *hash = bat_priv->dat.hash; - uint32_t index; + u32 index; if (!hash) return NULL; @@ -300,7 +300,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, * @vid: VLAN identifier */ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, - uint8_t *mac_addr, unsigned short vid) + u8 *mac_addr, unsigned short vid) { struct batadv_dat_entry *dat_entry; int hash_added; @@ -357,11 +357,11 @@ out: * @msg: message to print together with the debugging information */ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, - uint16_t type, int hdr_size, char *msg) + u16 type, int hdr_size, char *msg) { struct batadv_unicast_4addr_packet *unicast_4addr_packet; struct batadv_bcast_packet *bcast_pkt; - uint8_t *orig_addr; + u8 *orig_addr; __be32 ip_src, ip_dst; if (msg) @@ -424,7 +424,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, #else static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, - uint16_t type, int hdr_size, char *msg) + u16 type, int hdr_size, char *msg) { } @@ -497,7 +497,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, int select, batadv_dat_addr_t ip_key, batadv_dat_addr_t *last_max) { - batadv_dat_addr_t max = 0, tmp_max = 0; + batadv_dat_addr_t max = 0; + batadv_dat_addr_t tmp_max = 0; struct batadv_orig_node *orig_node, *max_orig_node = NULL; struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; @@ -709,9 +710,8 @@ void batadv_dat_status_update(struct net_device *net_dev) */ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, - void *tvlv_value, - uint16_t tvlv_value_len) + u8 flags, + void *tvlv_value, u16 tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities); @@ -787,7 +787,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) struct hlist_head *head; unsigned long last_seen_jiffies; int last_seen_msecs, last_seen_secs, last_seen_mins; - uint32_t i; + u32 i; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) @@ -830,14 +830,14 @@ out: * * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ -static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, - struct sk_buff *skb, int hdr_size) +static u16 batadv_arp_get_type(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct arphdr *arphdr; struct ethhdr *ethhdr; __be32 ip_src, ip_dst; - uint8_t *hw_src, *hw_dst; - uint16_t type = 0; + u8 *hw_src, *hw_dst; + u16 type = 0; /* pull the ethernet header */ if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN))) @@ -934,9 +934,9 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) { - uint16_t type = 0; + u16 type = 0; __be32 ip_dst, ip_src; - uint8_t *hw_src; + u8 *hw_src; bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; @@ -1022,9 +1022,9 @@ out: bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) { - uint16_t type; + u16 type; __be32 ip_src, ip_dst; - uint8_t *hw_src; + u8 *hw_src; struct sk_buff *skb_new; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; @@ -1100,9 +1100,9 @@ out: void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb) { - uint16_t type; + u16 type; __be32 ip_src, ip_dst; - uint8_t *hw_src, *hw_dst; + u8 *hw_src, *hw_dst; int hdr_size = 0; unsigned short vid; @@ -1146,9 +1146,9 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) { - uint16_t type; + u16 type; __be32 ip_src, ip_dst; - uint8_t *hw_src, *hw_dst; + u8 *hw_src, *hw_dst; bool dropped = false; unsigned short vid; @@ -1202,7 +1202,7 @@ out: bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) { - uint16_t type; + u16 type; __be32 ip_dst; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 3181507ebc14..26d4a525a798 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -54,7 +54,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, static inline void batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) { - uint32_t addr; + u32 addr; addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX); orig_node->dat_addr = (batadv_dat_addr_t)addr; @@ -69,7 +69,7 @@ static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if) { - uint32_t addr; + u32 addr; addr = batadv_choose_orig(primary_if->net_dev->dev_addr, BATADV_DAT_ADDR_MAX); @@ -89,7 +89,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); * Updates the ethtool statistics for the received packet if it is a DAT subtype */ static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, - uint8_t subtype) + u8 subtype) { switch (subtype) { case BATADV_P_DAT_DHT_GET: @@ -169,7 +169,7 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv) } static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, - uint8_t subtype) + u8 subtype) { } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c0f0d01ab244..700c96c82a15 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -25,6 +25,7 @@ #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> #include <linux/skbuff.h> @@ -66,7 +67,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, bool (*check_cb)(struct batadv_frag_table_entry *)) { struct batadv_frag_table_entry *chain; - uint8_t i; + u8 i; for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { chain = &orig_node->fragments[i]; @@ -110,8 +111,10 @@ static int batadv_frag_size_limit(void) * without searching for the right position. */ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, - uint16_t seqno) + u16 seqno) { + lockdep_assert_held(&chain->lock); + if (chain->seqno == seqno) return false; @@ -145,8 +148,8 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr; struct batadv_frag_list_entry *frag_entry_last = NULL; struct batadv_frag_packet *frag_packet; - uint8_t bucket; - uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet); + u8 bucket; + u16 seqno, hdr_size = sizeof(struct batadv_frag_packet); bool ret = false; /* Linearize packet to avoid linearizing 16 packets in a row when doing @@ -351,7 +354,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_dst = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; - uint16_t total_size; + u16 total_size; bool ret = false; packet = (struct batadv_frag_packet *)skb->data; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 6012e2b4af4f..e6c8382c79ba 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -27,7 +27,6 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> @@ -153,16 +152,14 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) struct batadv_neigh_node *router; struct batadv_neigh_ifinfo *router_ifinfo; struct batadv_gw_node *gw_node, *curr_gw = NULL; - uint64_t max_gw_factor = 0, tmp_gw_factor = 0; - uint8_t max_tq = 0; - uint8_t tq_avg; + u64 max_gw_factor = 0; + u64 tmp_gw_factor = 0; + u8 max_tq = 0; + u8 tq_avg; struct batadv_orig_node *orig_node; rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - if (gw_node->deleted) - continue; - orig_node = gw_node->orig_node; router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) @@ -263,7 +260,8 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) void batadv_gw_election(struct batadv_priv *bat_priv) { - struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL; + struct batadv_gw_node *curr_gw = NULL; + struct batadv_gw_node *next_gw = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL; char gw_addr[18] = { '\0' }; @@ -347,8 +345,9 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_neigh_ifinfo *router_orig_tq = NULL; struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; - struct batadv_neigh_node *router_gw = NULL, *router_orig = NULL; - uint8_t gw_tq_avg, orig_tq_avg; + struct batadv_neigh_node *router_gw = NULL; + struct batadv_neigh_node *router_orig = NULL; + u8 gw_tq_avg, orig_tq_avg; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) @@ -470,9 +469,6 @@ batadv_gw_node_get(struct batadv_priv *bat_priv, if (gw_node_tmp->orig_node != orig_node) continue; - if (gw_node_tmp->deleted) - continue; - if (!atomic_inc_not_zero(&gw_node_tmp->refcount)) continue; @@ -522,9 +518,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - gw_node->deleted = 0; if (ntohl(gateway->bandwidth_down) == 0) { - gw_node->deleted = jiffies; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Gateway %pM removed from gateway list\n", orig_node->orig); @@ -532,14 +526,21 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, /* Note: We don't need a NULL check here, since curr_gw never * gets dereferenced. */ + spin_lock_bh(&bat_priv->gw.list_lock); + hlist_del_init_rcu(&gw_node->list); + spin_unlock_bh(&bat_priv->gw.list_lock); + + batadv_gw_node_free_ref(gw_node); + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (gw_node == curr_gw) batadv_gw_reselect(bat_priv); + + if (curr_gw) + batadv_gw_node_free_ref(curr_gw); } out: - if (curr_gw) - batadv_gw_node_free_ref(curr_gw); if (gw_node) batadv_gw_node_free_ref(gw_node); } @@ -555,39 +556,18 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig_node, &gateway); } -void batadv_gw_node_purge(struct batadv_priv *bat_priv) +void batadv_gw_node_free(struct batadv_priv *bat_priv) { - struct batadv_gw_node *gw_node, *curr_gw; + struct batadv_gw_node *gw_node; struct hlist_node *node_tmp; - unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); - int do_reselect = 0; - - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); spin_lock_bh(&bat_priv->gw.list_lock); - hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.list, list) { - if (((!gw_node->deleted) || - (time_before(jiffies, gw_node->deleted + timeout))) && - atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) - continue; - - if (curr_gw == gw_node) - do_reselect = 1; - - hlist_del_rcu(&gw_node->list); + hlist_del_init_rcu(&gw_node->list); batadv_gw_node_free_ref(gw_node); } - spin_unlock_bh(&bat_priv->gw.list_lock); - - /* gw_reselect() needs to acquire the gw_list_lock */ - if (do_reselect) - batadv_gw_reselect(bat_priv); - - if (curr_gw) - batadv_gw_node_free_ref(curr_gw); } /* fails if orig_node has no router */ @@ -651,9 +631,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - if (gw_node->deleted) - continue; - /* fails if orig_node has no router */ if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0) continue; @@ -688,7 +665,7 @@ out: */ enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, - uint8_t *chaddr) + u8 *chaddr) { enum batadv_dhcp_recipient ret = BATADV_DHCP_NO; struct ethhdr *ethhdr; @@ -698,7 +675,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, struct vlan_ethhdr *vhdr; int chaddr_offset; __be16 proto; - uint8_t *p; + u8 *p; /* check for ethernet header */ if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) @@ -808,13 +785,15 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; + struct batadv_neigh_node *neigh_curr = NULL; + struct batadv_neigh_node *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; - struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL; + struct batadv_gw_node *gw_node = NULL; + struct batadv_gw_node *curr_gw = NULL; struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo; struct ethhdr *ethhdr = (struct ethhdr *)skb->data; bool out_of_range = false; - uint8_t curr_tq_avg; + u8 curr_tq_avg; unsigned short vid; vid = batadv_get_vid(skb, 0); diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 89565b451c18..fa9527785ed3 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -38,11 +38,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_tvlv_gateway_data *gateway); void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); -void batadv_gw_node_purge(struct batadv_priv *bat_priv); +void batadv_gw_node_free(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, - uint8_t *chaddr); + u8 *chaddr); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 39cf44ccebd4..0cb5e6b6f6d4 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -19,8 +19,10 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/errno.h> #include <linux/byteorder/generic.h> #include <linux/kernel.h> +#include <linux/math64.h> #include <linux/netdevice.h> #include <linux/stddef.h> #include <linux/string.h> @@ -39,11 +41,11 @@ * Returns false on parse error and true otherwise. */ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, - uint32_t *down, uint32_t *up) + u32 *down, u32 *up) { enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; char *slash_ptr, *tmp_ptr; - long ldown, lup; + u64 ldown, lup; int ret; slash_ptr = strchr(buff, '/'); @@ -61,7 +63,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = kstrtol(buff, 10, &ldown); + ret = kstrtou64(buff, 10, &ldown); if (ret) { batadv_err(net_dev, "Download speed of gateway mode invalid: %s\n", @@ -71,14 +73,31 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, switch (bw_unit_type) { case BATADV_BW_UNIT_MBIT: - *down = ldown * 10; + /* prevent overflow */ + if (U64_MAX / 10 < ldown) { + batadv_err(net_dev, + "Download speed of gateway mode too large: %s\n", + buff); + return false; + } + + ldown *= 10; break; case BATADV_BW_UNIT_KBIT: default: - *down = ldown / 100; + ldown = div_u64(ldown, 100); break; } + if (U32_MAX < ldown) { + batadv_err(net_dev, + "Download speed of gateway mode too large: %s\n", + buff); + return false; + } + + *down = ldown; + /* we also got some upload info */ if (slash_ptr) { bw_unit_type = BATADV_BW_UNIT_KBIT; @@ -94,7 +113,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = kstrtol(slash_ptr + 1, 10, &lup); + ret = kstrtou64(slash_ptr + 1, 10, &lup); if (ret) { batadv_err(net_dev, "Upload speed of gateway mode invalid: %s\n", @@ -104,13 +123,30 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, switch (bw_unit_type) { case BATADV_BW_UNIT_MBIT: - *up = lup * 10; + /* prevent overflow */ + if (U64_MAX / 10 < lup) { + batadv_err(net_dev, + "Upload speed of gateway mode too large: %s\n", + slash_ptr + 1); + return false; + } + + lup *= 10; break; case BATADV_BW_UNIT_KBIT: default: - *up = lup / 100; + lup = div_u64(lup, 100); break; } + + if (U32_MAX < lup) { + batadv_err(net_dev, + "Upload speed of gateway mode too large: %s\n", + slash_ptr + 1); + return false; + } + + *up = lup; } return true; @@ -124,7 +160,7 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) { struct batadv_tvlv_gateway_data gw; - uint32_t down, up; + u32 down, up; char gw_mode; gw_mode = atomic_read(&bat_priv->gw_mode); @@ -149,7 +185,10 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) { struct batadv_priv *bat_priv = netdev_priv(net_dev); - uint32_t down_curr, up_curr, down_new = 0, up_new = 0; + u32 down_curr; + u32 up_curr; + u32 down_new = 0; + u32 up_new = 0; bool ret; down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down); @@ -157,7 +196,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new); if (!ret) - goto end; + return -EINVAL; if (!down_new) down_new = 1; @@ -181,7 +220,6 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, atomic_set(&bat_priv->gw.bandwidth_up, up_new); batadv_gw_tvlv_container_update(bat_priv); -end: return count; } @@ -195,9 +233,8 @@ end: */ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, - void *tvlv_value, - uint16_t tvlv_value_len) + u8 flags, + void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_gateway_data gateway, *gateway_ptr; diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index bd5c812cebf4..ab893e318229 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -22,7 +22,6 @@ #include <linux/types.h> -struct batadv_priv; struct net_device; enum batadv_gw_modes { diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f4a15d2e5eaf..f11345e163d7 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -252,6 +252,44 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) rcu_read_unlock(); } +/** + * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom + * @soft_iface: netdev struct of the mesh interface + */ +static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) +{ + const struct batadv_hard_iface *hard_iface; + unsigned short lower_header_len = ETH_HLEN; + unsigned short lower_headroom = 0; + unsigned short lower_tailroom = 0; + unsigned short needed_headroom; + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) + continue; + + if (hard_iface->soft_iface != soft_iface) + continue; + + lower_header_len = max_t(unsigned short, lower_header_len, + hard_iface->net_dev->hard_header_len); + + lower_headroom = max_t(unsigned short, lower_headroom, + hard_iface->net_dev->needed_headroom); + + lower_tailroom = max_t(unsigned short, lower_tailroom, + hard_iface->net_dev->needed_tailroom); + } + rcu_read_unlock(); + + needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); + needed_headroom += batadv_max_header_len(); + + soft_iface->needed_headroom = needed_headroom; + soft_iface->needed_tailroom = lower_tailroom; +} + int batadv_hardif_min_mtu(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); @@ -474,6 +512,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, "Not using interface %s (retrying later): interface not active\n", hard_iface->net_dev->name); + batadv_hardif_recalc_extra_skbroom(soft_iface); + /* begin scheduling originator messages on that interface */ batadv_schedule_bat_ogm(hard_iface); @@ -528,6 +568,9 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_purge_outstanding_packets(bat_priv, hard_iface); dev_put(hard_iface->soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); + batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); + /* nobody uses this interface anymore */ if (!bat_priv->num_ifaces) { batadv_gw_check_client_stop(bat_priv); @@ -536,7 +579,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } - netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); hard_iface->soft_iface = NULL; batadv_hardif_free_ref(hard_iface); diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index e89f3146b092..2ea6a18d793f 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -25,7 +25,7 @@ /* clears the hash */ static void batadv_hash_init(struct batadv_hashtable *hash) { - uint32_t i; + u32 i; for (i = 0; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); @@ -42,7 +42,7 @@ void batadv_hash_destroy(struct batadv_hashtable *hash) } /* allocates and clears the hash */ -struct batadv_hashtable *batadv_hash_new(uint32_t size) +struct batadv_hashtable *batadv_hash_new(u32 size) { struct batadv_hashtable *hash; @@ -73,7 +73,7 @@ free_hash: void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key) { - uint32_t i; + u32 i; for (i = 0; i < hash->size; i++) lockdep_set_class(&hash->list_locks[i], key); diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 5065f50c9c3c..377626250ac7 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -39,17 +39,17 @@ typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, * based on the key in the data of the first * argument and the size the second */ -typedef uint32_t (*batadv_hashdata_choose_cb)(const void *, uint32_t); +typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); struct batadv_hashtable { struct hlist_head *table; /* the hashtable itself with the buckets */ spinlock_t *list_locks; /* spinlock for each hash list entry */ - uint32_t size; /* size of hashtable */ + u32 size; /* size of hashtable */ }; /* allocates and clears the hash */ -struct batadv_hashtable *batadv_hash_new(uint32_t size); +struct batadv_hashtable *batadv_hash_new(u32 size); /* set class key for all locks */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, @@ -69,7 +69,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash, struct hlist_head *head; struct hlist_node *node, *node_tmp; spinlock_t *list_lock; /* spinlock to protect write access */ - uint32_t i; + u32 i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -105,7 +105,7 @@ static inline int batadv_hash_add(struct batadv_hashtable *hash, const void *data, struct hlist_node *data_node) { - uint32_t index; + u32 index; int ret = -1; struct hlist_head *head; struct hlist_node *node; @@ -149,7 +149,7 @@ static inline void *batadv_hash_remove(struct batadv_hashtable *hash, batadv_hashdata_choose_cb choose, void *data) { - uint32_t index; + u32 index; struct hlist_node *node; struct hlist_head *head; void *data_save = NULL; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 07061bcbaa04..bcabb5e3f4d3 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -183,7 +183,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; size_t packet_len = sizeof(struct batadv_icmp_packet); - uint8_t *addr; + u8 *addr; if (len < sizeof(struct batadv_icmp_header)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -337,8 +337,8 @@ err: } /** - * batadv_socket_receive_packet - schedule an icmp packet to be sent to userspace - * on an icmp socket. + * batadv_socket_receive_packet - schedule an icmp packet to be sent to + * userspace on an icmp socket. * @socket_client: the socket this packet belongs to * @icmph: pointer to the header of the icmp packet * @icmp_len: total length of the icmp packet diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 7de7fce4b48c..e937143f0b10 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -23,7 +23,6 @@ #include <linux/types.h> struct batadv_icmp_header; -struct batadv_priv; #define BATADV_ICMP_SOCKET "socket" diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 8457097f1643..d7f17c1aa4a4 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -30,6 +30,7 @@ #include <linux/ipv6.h> #include <linux/kernel.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netdevice.h> @@ -148,7 +149,7 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv6_list); #endif INIT_LIST_HEAD(&bat_priv->tt.changes_list); - INIT_LIST_HEAD(&bat_priv->tt.req_list); + INIT_HLIST_HEAD(&bat_priv->tt.req_list); INIT_LIST_HEAD(&bat_priv->tt.roam_list); #ifdef CONFIG_BATMAN_ADV_MCAST INIT_HLIST_HEAD(&bat_priv->mcast.mla_list); @@ -198,7 +199,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_purge_outstanding_packets(bat_priv, NULL); - batadv_gw_node_purge(bat_priv); + batadv_gw_node_free(bat_priv); batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); @@ -234,7 +235,7 @@ void batadv_mesh_free(struct net_device *soft_iface) * * Returns 'true' if the mac address was found, false otherwise. */ -bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) +bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) { const struct batadv_hard_iface *hard_iface; bool is_my_mac = false; @@ -387,7 +388,7 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct batadv_priv *bat_priv; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *hard_iface; - uint8_t idx; + u8 idx; int ret; hard_iface = container_of(ptype, struct batadv_hard_iface, @@ -496,7 +497,7 @@ static void batadv_recv_handler_init(void) } int -batadv_recv_handler_register(uint8_t packet_type, +batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { @@ -512,7 +513,7 @@ batadv_recv_handler_register(uint8_t packet_type, return 0; } -void batadv_recv_handler_unregister(uint8_t packet_type) +void batadv_recv_handler_unregister(u8 packet_type) { batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; } @@ -583,7 +584,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) seq_puts(seq, "Available routing algorithms:\n"); hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { - seq_printf(seq, "%s\n", bat_algo_ops->name); + seq_printf(seq, " * %s\n", bat_algo_ops->name); } return 0; @@ -642,8 +643,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler) * Returns tvlv handler if found or NULL otherwise. */ static struct batadv_tvlv_handler -*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version) +*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) { struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; @@ -691,8 +691,7 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) * Returns tvlv container if found or NULL otherwise. */ static struct batadv_tvlv_container -*batadv_tvlv_container_get(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version) +*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) { struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; @@ -723,10 +722,10 @@ static struct batadv_tvlv_container * * Returns size of all currently registered tvlv containers in bytes. */ -static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) { struct batadv_tvlv_container *tvlv; - uint16_t tvlv_len = 0; + u16 tvlv_len = 0; hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { tvlv_len += sizeof(struct batadv_tvlv_hdr); @@ -739,13 +738,17 @@ static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) /** * batadv_tvlv_container_remove - remove tvlv container from the tvlv container * list + * @bat_priv: the bat priv with all the soft interface information * @tvlv: the to be removed tvlv container * * Has to be called with the appropriate locks being acquired * (tvlv.container_list_lock). */ -static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv) +static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, + struct batadv_tvlv_container *tvlv) { + lockdep_assert_held(&bat_priv->tvlv.handler_list_lock); + if (!tvlv) return; @@ -764,13 +767,13 @@ static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv) * @version: tvlv container type to unregister */ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version) + u8 type, u8 version) { struct batadv_tvlv_container *tvlv; spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(tvlv); + batadv_tvlv_container_remove(bat_priv, tvlv); spin_unlock_bh(&bat_priv->tvlv.container_list_lock); } @@ -787,8 +790,8 @@ void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, * content is going to replace the old one. */ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version, - void *tvlv_value, uint16_t tvlv_value_len) + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_container *tvlv_old, *tvlv_new; @@ -809,7 +812,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(tvlv_old); + batadv_tvlv_container_remove(bat_priv, tvlv_old); hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); spin_unlock_bh(&bat_priv->tvlv.container_list_lock); } @@ -861,14 +864,13 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, * * Returns size of all appended tvlv containers in bytes. */ -uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, - int packet_min_len) +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len) { struct batadv_tvlv_container *tvlv; struct batadv_tvlv_hdr *tvlv_hdr; - uint16_t tvlv_value_len; + u16 tvlv_value_len; void *tvlv_value; bool ret; @@ -893,7 +895,7 @@ uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, tvlv_hdr->len = tvlv->tvlv_hdr.len; tvlv_value = tvlv_hdr + 1; memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); - tvlv_value = (uint8_t *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); + tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); } end: @@ -920,8 +922,8 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, struct batadv_tvlv_handler *tvlv_handler, bool ogm_source, struct batadv_orig_node *orig_node, - uint8_t *src, uint8_t *dst, - void *tvlv_value, uint16_t tvlv_value_len) + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) { if (!tvlv_handler) return NET_RX_SUCCESS; @@ -972,13 +974,13 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, bool ogm_source, struct batadv_orig_node *orig_node, - uint8_t *src, uint8_t *dst, - void *tvlv_value, uint16_t tvlv_value_len) + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_handler *tvlv_handler; struct batadv_tvlv_hdr *tvlv_hdr; - uint16_t tvlv_value_cont_len; - uint8_t cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; + u16 tvlv_value_cont_len; + u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; int ret = NET_RX_SUCCESS; while (tvlv_value_len >= sizeof(*tvlv_hdr)) { @@ -1000,7 +1002,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, tvlv_value_cont_len); if (tvlv_handler) batadv_tvlv_handler_free_ref(tvlv_handler); - tvlv_value = (uint8_t *)tvlv_value + tvlv_value_cont_len; + tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; tvlv_value_len -= tvlv_value_cont_len; } @@ -1034,7 +1036,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { void *tvlv_value; - uint16_t tvlv_value_len; + u16 tvlv_value_len; if (!batadv_ogm_packet) return; @@ -1066,14 +1068,14 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, void (*optr)(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, + u8 flags, void *tvlv_value, - uint16_t tvlv_value_len), + u16 tvlv_value_len), int (*uptr)(struct batadv_priv *bat_priv, - uint8_t *src, uint8_t *dst, + u8 *src, u8 *dst, void *tvlv_value, - uint16_t tvlv_value_len), - uint8_t type, uint8_t version, uint8_t flags) + u16 tvlv_value_len), + u8 type, u8 version, u8 flags) { struct batadv_tvlv_handler *tvlv_handler; @@ -1108,7 +1110,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, * @version: tvlv handler version to be unregistered */ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version) + u8 type, u8 version) { struct batadv_tvlv_handler *tvlv_handler; @@ -1134,9 +1136,9 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst, uint8_t type, uint8_t version, - void *tvlv_value, uint16_t tvlv_value_len) +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) { struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; struct batadv_tvlv_hdr *tvlv_hdr; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 41d27c7872b9..ebd8af0a1eb0 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2015.1" +#define BATADV_SOURCE_VERSION "2015.2" #endif /* B.A.T.M.A.N. parameters */ @@ -193,7 +193,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -bool batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); +bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_max_header_len(void); @@ -202,10 +202,10 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); int -batadv_recv_handler_register(uint8_t packet_type, +batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)); -void batadv_recv_handler_unregister(uint8_t packet_type); +void batadv_recv_handler_unregister(u8 packet_type); int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); @@ -304,7 +304,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * they handle overflows/underflows and can correctly check for a * predecessor/successor unless the variable sequence number has grown by * more then 2**(bitwidth(x)-1)-1. - * This means that for a uint8_t with the maximum value 255, it would think: + * This means that for a u8 with the maximum value 255, it would think: * - when adding nothing - it is neither a predecessor nor a successor * - before adding more than 127 to the starting value - it is a predecessor, * - when adding 128 - it is neither a predecessor nor a successor, @@ -327,10 +327,9 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) /* Sum and return the cpu-local counters for index 'idx' */ -static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv, - size_t idx) +static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) { - uint64_t *counters, sum = 0; + u64 *counters, sum = 0; int cpu; for_each_possible_cpu(cpu) { @@ -348,39 +347,38 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv, #define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version, - void *tvlv_value, uint16_t tvlv_value_len); -uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, - int packet_min_len); + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len); void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_orig_node *orig_node); void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version); + u8 type, u8 version); void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, void (*optr)(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, + u8 flags, void *tvlv_value, - uint16_t tvlv_value_len), + u16 tvlv_value_len), int (*uptr)(struct batadv_priv *bat_priv, - uint8_t *src, uint8_t *dst, + u8 *src, u8 *dst, void *tvlv_value, - uint16_t tvlv_value_len), - uint8_t type, uint8_t version, uint8_t flags); + u16 tvlv_value_len), + u8 type, u8 version, u8 flags); void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - uint8_t type, uint8_t version); + u8 type, u8 version); int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, bool ogm_source, struct batadv_orig_node *orig_node, - uint8_t *src, uint8_t *dst, - void *tvlv_buff, uint16_t tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst, uint8_t type, uint8_t version, - void *tvlv_value, uint16_t tvlv_value_len); + u8 *src, u8 *dst, + void *tvlv_buff, u16 tvlv_buff_len); +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 68a9554961eb..eb76386f8d4b 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -31,6 +31,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -89,7 +90,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, * Returns true if the given address is already in the given list. * Otherwise returns false. */ -static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr, +static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; @@ -103,15 +104,19 @@ static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr, /** * batadv_mcast_mla_list_free - free a list of multicast addresses + * @bat_priv: the bat priv with all the soft interface information * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. */ -static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) +static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, + struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; + lockdep_assert_held(&bat_priv->tt.commit_lock); + hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { hlist_del(&mcast_entry->list); kfree(mcast_entry); @@ -134,6 +139,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; + lockdep_assert_held(&bat_priv->tt.commit_lock); + hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { if (mcast_list && @@ -164,6 +171,8 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; + lockdep_assert_held(&bat_priv->tt.commit_lock); + if (!mcast_list) return; @@ -268,7 +277,7 @@ update: batadv_mcast_mla_tt_add(bat_priv, &mcast_list); out: - batadv_mcast_mla_list_free(&mcast_list); + batadv_mcast_mla_list_free(bat_priv, &mcast_list); } /** @@ -595,11 +604,13 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, */ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t mcast_flags) + u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node; struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list; + lockdep_assert_held(&orig->mcast_handler_lock); + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) { @@ -638,11 +649,13 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, */ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t mcast_flags) + u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv4_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list; + lockdep_assert_held(&orig->mcast_handler_lock); + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) { @@ -681,11 +694,13 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, */ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t mcast_flags) + u8 mcast_flags) { struct hlist_node *node = &orig->mcast_want_all_ipv6_node; struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list; + lockdep_assert_held(&orig->mcast_handler_lock); + /* switched from flag unset to set */ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 && !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) { @@ -721,17 +736,17 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, */ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, + u8 flags, void *tvlv_value, - uint16_t tvlv_value_len) + u16 tvlv_value_len) { bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); - uint8_t mcast_flags = BATADV_NO_FLAGS; + u8 mcast_flags = BATADV_NO_FLAGS; bool orig_initialized; if (orig_mcast_enabled && tvlv_value && (tvlv_value_len >= sizeof(mcast_flags))) - mcast_flags = *(uint8_t *)tvlv_value; + mcast_flags = *(u8 *)tvlv_value; spin_lock_bh(&orig->mcast_handler_lock); orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST, diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index beb6e56c624a..8f3cb04b9f13 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -20,8 +20,6 @@ #include "main.h" -struct batadv_orig_node; -struct batadv_priv; struct sk_buff; /** diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 46604010dcd4..f5276be2c77c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -130,9 +130,8 @@ void batadv_nc_status_update(struct net_device *net_dev) */ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, - void *tvlv_value, - uint16_t tvlv_value_len) + u8 flags, + void *tvlv_value, u16 tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); @@ -382,7 +381,7 @@ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; - uint32_t i; + u32 i; if (!hash) return; @@ -418,7 +417,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, struct hlist_node *node_tmp; struct batadv_nc_path *nc_path; spinlock_t *lock; /* Protects lists in hash */ - uint32_t i; + u32 i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -478,10 +477,10 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, * * Returns the selected index in the hash table for the given data. */ -static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size) +static u32 batadv_nc_hash_choose(const void *data, u32 size) { const struct batadv_nc_path *nc_path = data; - uint32_t hash = 0; + u32 hash = 0; hash = jhash(&nc_path->prev_hop, sizeof(nc_path->prev_hop), hash); hash = jhash(&nc_path->next_hop, sizeof(nc_path->next_hop), hash); @@ -587,6 +586,8 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, unsigned long timeout = bat_priv->nc.max_buffer_time; bool res = false; + lockdep_assert_held(&nc_path->packet_list_lock); + /* Packets are added to tail, so the remaining packets did not time * out and we can stop processing the current queue */ @@ -623,6 +624,8 @@ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, { unsigned long timeout = bat_priv->nc.max_fwd_delay; + lockdep_assert_held(&nc_path->packet_list_lock); + /* Packets are added to tail, so the remaining packets did not time * out and we can stop processing the current queue */ @@ -744,8 +747,8 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, struct batadv_ogm_packet *ogm_packet) { struct batadv_orig_ifinfo *orig_ifinfo; - uint32_t last_real_seqno; - uint8_t last_ttl; + u32 last_real_seqno; + u8 last_ttl; orig_ifinfo = batadv_orig_ifinfo_get(orig_node, BATADV_IF_DEFAULT); if (!orig_ifinfo) @@ -873,8 +876,8 @@ free: } /** - * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs - * (best called on incoming OGMs) + * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node + * structs (best called on incoming OGMs) * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet @@ -888,7 +891,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_ogm_packet *ogm_packet, int is_single_hop_neigh) { - struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL; + struct batadv_nc_node *in_nc_node = NULL; + struct batadv_nc_node *out_nc_node = NULL; /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) @@ -938,8 +942,8 @@ out: */ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, - uint8_t *src, - uint8_t *dst) + u8 *src, + u8 *dst) { int hash_added; struct batadv_nc_path *nc_path, nc_path_key; @@ -991,9 +995,9 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, * selection of a receiver with slightly lower TQ than the other * @tq: to be weighted tq value */ -static uint8_t batadv_nc_random_weight_tq(uint8_t tq) +static u8 batadv_nc_random_weight_tq(u8 tq) { - uint8_t rand_val, rand_tq; + u8 rand_val, rand_tq; get_random_bytes(&rand_val, sizeof(rand_val)); @@ -1038,7 +1042,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct batadv_nc_packet *nc_packet, struct batadv_neigh_node *neigh_node) { - uint8_t tq_weighted_neigh, tq_weighted_coding, tq_tmp; + u8 tq_weighted_neigh, tq_weighted_coding, tq_tmp; struct sk_buff *skb_dest, *skb_src; struct batadv_unicast_packet *packet1; struct batadv_unicast_packet *packet2; @@ -1047,7 +1051,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct batadv_neigh_node *router_coding = NULL; struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; - uint8_t *first_source, *first_dest, *second_source, *second_dest; + u8 *first_source, *first_dest, *second_source, *second_dest; __be32 packet_id1, packet_id2; size_t count; bool res = false; @@ -1231,8 +1235,7 @@ out: * * Returns true if coding of a decoded packet is allowed. */ -static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, - uint8_t *dst, uint8_t *src) +static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) { if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) return false; @@ -1255,7 +1258,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, struct batadv_nc_node *in_nc_node, struct batadv_nc_node *out_nc_node, struct sk_buff *skb, - uint8_t *eth_dst) + u8 *eth_dst) { struct batadv_nc_path *nc_path, nc_path_key; struct batadv_nc_packet *nc_packet_out = NULL; @@ -1321,8 +1324,8 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, static struct batadv_nc_packet * batadv_nc_skb_src_search(struct batadv_priv *bat_priv, struct sk_buff *skb, - uint8_t *eth_dst, - uint8_t *eth_src, + u8 *eth_dst, + u8 *eth_src, struct batadv_nc_node *in_nc_node) { struct batadv_orig_node *orig_node; @@ -1362,7 +1365,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv, */ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct sk_buff *skb, - uint8_t *eth_dst_new) + u8 *eth_dst_new) { struct ethhdr *ethhdr; @@ -1638,7 +1641,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_unicast_packet *unicast_packet; struct batadv_coded_packet coded_packet_tmp; struct ethhdr *ethhdr, ethhdr_tmp; - uint8_t *orig_dest, ttl, ttvn; + u8 *orig_dest, ttl, ttvn; unsigned int coding_len; int err; @@ -1730,7 +1733,7 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; struct batadv_nc_path *nc_path, nc_path_key; - uint8_t *dest, *source; + u8 *dest, *source; __be32 packet_id; int index; diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 5b79aa8c64c1..8f6d4ad8778a 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,11 +22,7 @@ #include <linux/types.h> -struct batadv_nc_node; -struct batadv_neigh_node; struct batadv_ogm_packet; -struct batadv_orig_node; -struct batadv_priv; struct net_device; struct seq_file; struct sk_buff; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 32a0fcfab36d..7486df9ed48d 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -26,6 +26,7 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/rculist.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -70,7 +71,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan = NULL, *tmp; rcu_read_lock(); - list_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) { + hlist_for_each_entry_rcu(tmp, &orig_node->vlan_list, list) { if (tmp->vid != vid) continue; @@ -118,7 +119,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, atomic_set(&vlan->refcount, 2); vlan->vid = vid; - list_add_rcu(&vlan->list, &orig_node->vlan_list); + hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); out: spin_unlock_bh(&orig_node->vlan_list_lock); @@ -442,41 +443,6 @@ out: } /** - * batadv_neigh_node_new - create and init a new neigh_node object - * @hard_iface: the interface where the neighbour is connected to - * @neigh_addr: the mac address of the neighbour interface - * @orig_node: originator object representing the neighbour - * - * Allocates a new neigh_node object and initialises all the generic fields. - * Returns the new object or NULL on failure. - */ -struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, - struct batadv_orig_node *orig_node) -{ - struct batadv_neigh_node *neigh_node; - - neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); - if (!neigh_node) - goto out; - - INIT_HLIST_NODE(&neigh_node->list); - INIT_HLIST_HEAD(&neigh_node->ifinfo_list); - spin_lock_init(&neigh_node->ifinfo_lock); - - ether_addr_copy(neigh_node->addr, neigh_addr); - neigh_node->if_incoming = hard_iface; - neigh_node->orig_node = orig_node; - - /* extra reference for return */ - atomic_set(&neigh_node->refcount, 2); - -out: - return neigh_node; -} - -/** * batadv_neigh_node_get - retrieve a neighbour from the list * @orig_node: originator which the neighbour belongs to * @hard_iface: the interface where this neighbour is connected to @@ -486,10 +452,10 @@ out: * which is connected through the provided hard interface. * Returns NULL if the neighbour is not found. */ -struct batadv_neigh_node * +static struct batadv_neigh_node * batadv_neigh_node_get(const struct batadv_orig_node *orig_node, const struct batadv_hard_iface *hard_iface, - const uint8_t *addr) + const u8 *addr) { struct batadv_neigh_node *tmp_neigh_node, *res = NULL; @@ -513,6 +479,59 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, } /** + * batadv_neigh_node_new - create and init a new neigh_node object + * @orig_node: originator object representing the neighbour + * @hard_iface: the interface where the neighbour is connected to + * @neigh_addr: the mac address of the neighbour interface + * + * Allocates a new neigh_node object and initialises all the generic fields. + * Returns the new object or NULL on failure. + */ +struct batadv_neigh_node * +batadv_neigh_node_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_neigh_node *neigh_node; + + neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); + if (neigh_node) + goto out; + + neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); + if (!neigh_node) + goto out; + + if (!atomic_inc_not_zero(&hard_iface->refcount)) { + kfree(neigh_node); + neigh_node = NULL; + goto out; + } + + INIT_HLIST_NODE(&neigh_node->list); + INIT_HLIST_HEAD(&neigh_node->ifinfo_list); + spin_lock_init(&neigh_node->ifinfo_lock); + + ether_addr_copy(neigh_node->addr, neigh_addr); + neigh_node->if_incoming = hard_iface; + neigh_node->orig_node = orig_node; + + /* extra reference for return */ + atomic_set(&neigh_node->refcount, 2); + + spin_lock_bh(&orig_node->neigh_list_lock); + hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + spin_unlock_bh(&orig_node->neigh_list_lock); + + batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, + "Creating new neighbor %pM for orig_node %pM on interface %s\n", + neigh_addr, orig_node->orig, hard_iface->net_dev->name); + +out: + return neigh_node; +} + +/** * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object * @rcu: rcu pointer of the orig_ifinfo object */ @@ -624,7 +643,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; - uint32_t i; + u32 i; if (!hash) return; @@ -659,7 +678,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) * Returns the newly created object or NULL on failure. */ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, - const uint8_t *addr) + const u8 *addr) { struct batadv_orig_node *orig_node; struct batadv_orig_node_vlan *vlan; @@ -674,7 +693,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); - INIT_LIST_HEAD(&orig_node->vlan_list); + INIT_HLIST_HEAD(&orig_node->vlan_list); INIT_HLIST_HEAD(&orig_node->ifinfo_list); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); @@ -981,7 +1000,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; - uint32_t i; + u32 i; if (!hash) return; @@ -1010,7 +1029,6 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) spin_unlock_bh(list_lock); } - batadv_gw_node_purge(bat_priv); batadv_gw_election(bat_priv); } @@ -1115,7 +1133,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; - uint32_t i; + u32 i; int ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on @@ -1152,7 +1170,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; - uint32_t i; + u32 i; int ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 79734d302010..fa18f9bf266b 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -40,15 +40,11 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, - const uint8_t *addr); + const u8 *addr); struct batadv_neigh_node * -batadv_neigh_node_get(const struct batadv_orig_node *orig_node, - const struct batadv_hard_iface *hard_iface, - const uint8_t *addr); -struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, - struct batadv_orig_node *orig_node); +batadv_neigh_node_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, @@ -86,9 +82,9 @@ void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); /* hashfunction to choose an entry in a hash table of given size * hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static inline uint32_t batadv_choose_orig(const void *data, uint32_t size) +static inline u32 batadv_choose_orig(const void *data, u32 size) { - uint32_t hash = 0; + u32 hash = 0; hash = jhash(data, ETH_ALEN, hash); return hash % size; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 9e747c08d0bc..11f996b39fef 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -197,8 +197,8 @@ enum batadv_tvlv_type { * transport the claim type and the group id */ struct batadv_bla_claim_dst { - uint8_t magic[3]; /* FF:43:05 */ - uint8_t type; /* bla_claimframe */ + u8 magic[3]; /* FF:43:05 */ + u8 type; /* bla_claimframe */ __be16 group; /* group id */ }; @@ -213,16 +213,16 @@ struct batadv_bla_claim_dst { * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { - uint8_t packet_type; - uint8_t version; - uint8_t ttl; - uint8_t flags; - __be32 seqno; - uint8_t orig[ETH_ALEN]; - uint8_t prev_sender[ETH_ALEN]; - uint8_t reserved; - uint8_t tq; - __be16 tvlv_len; + u8 packet_type; + u8 version; + u8 ttl; + u8 flags; + __be32 seqno; + u8 orig[ETH_ALEN]; + u8 prev_sender[ETH_ALEN]; + u8 reserved; + u8 tq; + __be16 tvlv_len; /* __packed is not needed as the struct size is divisible by 4, * and the largest data type in this struct has a size of 4. */ @@ -246,14 +246,14 @@ struct batadv_ogm_packet { * members are padded the same way as they are in real packets. */ struct batadv_icmp_header { - uint8_t packet_type; - uint8_t version; - uint8_t ttl; - uint8_t msg_type; /* see ICMP message types above */ - uint8_t dst[ETH_ALEN]; - uint8_t orig[ETH_ALEN]; - uint8_t uid; - uint8_t align[3]; + u8 packet_type; + u8 version; + u8 ttl; + u8 msg_type; /* see ICMP message types above */ + u8 dst[ETH_ALEN]; + u8 orig[ETH_ALEN]; + u8 uid; + u8 align[3]; }; /** @@ -269,15 +269,15 @@ struct batadv_icmp_header { * @seqno: ICMP sequence number */ struct batadv_icmp_packet { - uint8_t packet_type; - uint8_t version; - uint8_t ttl; - uint8_t msg_type; /* see ICMP message types above */ - uint8_t dst[ETH_ALEN]; - uint8_t orig[ETH_ALEN]; - uint8_t uid; - uint8_t reserved; - __be16 seqno; + u8 packet_type; + u8 version; + u8 ttl; + u8 msg_type; /* see ICMP message types above */ + u8 dst[ETH_ALEN]; + u8 orig[ETH_ALEN]; + u8 uid; + u8 reserved; + __be16 seqno; }; #define BATADV_RR_LEN 16 @@ -296,16 +296,16 @@ struct batadv_icmp_packet { * @rr: route record array */ struct batadv_icmp_packet_rr { - uint8_t packet_type; - uint8_t version; - uint8_t ttl; - uint8_t msg_type; /* see ICMP message types above */ - uint8_t dst[ETH_ALEN]; - uint8_t orig[ETH_ALEN]; - uint8_t uid; - uint8_t rr_cur; - __be16 seqno; - uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; + u8 packet_type; + u8 version; + u8 ttl; + u8 msg_type; /* see ICMP message types above */ + u8 dst[ETH_ALEN]; + u8 orig[ETH_ALEN]; + u8 uid; + u8 rr_cur; + __be16 seqno; + u8 rr[BATADV_RR_LEN][ETH_ALEN]; }; #define BATADV_ICMP_MAX_PACKET_SIZE sizeof(struct batadv_icmp_packet_rr) @@ -331,11 +331,11 @@ struct batadv_icmp_packet_rr { * @dest: originator destination of the unicast packet */ struct batadv_unicast_packet { - uint8_t packet_type; - uint8_t version; - uint8_t ttl; - uint8_t ttvn; /* destination translation table version number */ - uint8_t dest[ETH_ALEN]; + u8 packet_type; + u8 version; + u8 ttl; + u8 ttvn; /* destination translation table version number */ + u8 dest[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the * following ethernet header again 4 bytes boundary aligned */ @@ -349,9 +349,9 @@ struct batadv_unicast_packet { */ struct batadv_unicast_4addr_packet { struct batadv_unicast_packet u; - uint8_t src[ETH_ALEN]; - uint8_t subtype; - uint8_t reserved; + u8 src[ETH_ALEN]; + u8 subtype; + u8 reserved; /* "4 bytes boundary + 2 bytes" long to make the payload after the * following ethernet header again 4 bytes boundary aligned */ @@ -370,22 +370,22 @@ struct batadv_unicast_4addr_packet { * @total_size: size of the merged packet */ struct batadv_frag_packet { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; + u8 packet_type; + u8 version; /* batman version field */ + u8 ttl; #if defined(__BIG_ENDIAN_BITFIELD) - uint8_t no:4; - uint8_t reserved:4; + u8 no:4; + u8 reserved:4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - uint8_t reserved:4; - uint8_t no:4; + u8 reserved:4; + u8 no:4; #else #error "unknown bitfield endianness" #endif - uint8_t dest[ETH_ALEN]; - uint8_t orig[ETH_ALEN]; - __be16 seqno; - __be16 total_size; + u8 dest[ETH_ALEN]; + u8 orig[ETH_ALEN]; + __be16 seqno; + __be16 total_size; }; /** @@ -398,12 +398,12 @@ struct batadv_frag_packet { * @orig: originator of the broadcast packet */ struct batadv_bcast_packet { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - uint8_t reserved; - __be32 seqno; - uint8_t orig[ETH_ALEN]; + u8 packet_type; + u8 version; /* batman version field */ + u8 ttl; + u8 reserved; + __be32 seqno; + u8 orig[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the * following ethernet header again 4 bytes boundary aligned */ @@ -428,21 +428,21 @@ struct batadv_bcast_packet { * @coded_len: length of network coded part of the payload */ struct batadv_coded_packet { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - uint8_t first_ttvn; - /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ - uint8_t first_source[ETH_ALEN]; - uint8_t first_orig_dest[ETH_ALEN]; - __be32 first_crc; - uint8_t second_ttl; - uint8_t second_ttvn; - uint8_t second_dest[ETH_ALEN]; - uint8_t second_source[ETH_ALEN]; - uint8_t second_orig_dest[ETH_ALEN]; - __be32 second_crc; - __be16 coded_len; + u8 packet_type; + u8 version; /* batman version field */ + u8 ttl; + u8 first_ttvn; + /* u8 first_dest[ETH_ALEN]; - saved in mac header destination */ + u8 first_source[ETH_ALEN]; + u8 first_orig_dest[ETH_ALEN]; + __be32 first_crc; + u8 second_ttl; + u8 second_ttvn; + u8 second_dest[ETH_ALEN]; + u8 second_source[ETH_ALEN]; + u8 second_orig_dest[ETH_ALEN]; + __be32 second_crc; + __be16 coded_len; }; #pragma pack() @@ -459,14 +459,14 @@ struct batadv_coded_packet { * @align: 2 bytes to align the header to a 4 byte boundary */ struct batadv_unicast_tvlv_packet { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - uint8_t reserved; - uint8_t dst[ETH_ALEN]; - uint8_t src[ETH_ALEN]; - __be16 tvlv_len; - uint16_t align; + u8 packet_type; + u8 version; /* batman version field */ + u8 ttl; + u8 reserved; + u8 dst[ETH_ALEN]; + u8 src[ETH_ALEN]; + __be16 tvlv_len; + u16 align; }; /** @@ -476,9 +476,9 @@ struct batadv_unicast_tvlv_packet { * @len: tvlv container length */ struct batadv_tvlv_hdr { - uint8_t type; - uint8_t version; - __be16 len; + u8 type; + u8 version; + __be16 len; }; /** @@ -500,9 +500,9 @@ struct batadv_tvlv_gateway_data { * one batadv_tvlv_tt_vlan_data object per announced vlan */ struct batadv_tvlv_tt_data { - uint8_t flags; - uint8_t ttvn; - __be16 num_vlan; + u8 flags; + u8 ttvn; + __be16 num_vlan; }; /** @@ -513,9 +513,9 @@ struct batadv_tvlv_tt_data { * @reserved: unused, useful for alignment purposes */ struct batadv_tvlv_tt_vlan_data { - __be32 crc; - __be16 vid; - uint16_t reserved; + __be32 crc; + __be16 vid; + u16 reserved; }; /** @@ -527,9 +527,9 @@ struct batadv_tvlv_tt_vlan_data { * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { - uint8_t flags; - uint8_t reserved[3]; - uint8_t addr[ETH_ALEN]; + u8 flags; + u8 reserved[3]; + u8 addr[ETH_ALEN]; __be16 vid; }; @@ -539,7 +539,7 @@ struct batadv_tvlv_tt_change { * @vid: VLAN identifier */ struct batadv_tvlv_roam_adv { - uint8_t client[ETH_ALEN]; + u8 client[ETH_ALEN]; __be16 vid; }; @@ -549,8 +549,8 @@ struct batadv_tvlv_roam_adv { * @reserved: reserved field */ struct batadv_tvlv_mcast_data { - uint8_t flags; - uint8_t reserved[3]; + u8 flags; + u8 reserved[3]; }; #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index c360c0cd19c2..8d990b070a2e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -145,7 +145,7 @@ out: * 0 if the packet is to be accepted * 1 if the packet is to be ignored. */ -int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff, +int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, unsigned long *last_reset) { if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || @@ -653,19 +653,19 @@ out: static bool batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct batadv_unicast_packet *unicast_packet, - uint8_t *dst_addr, unsigned short vid) + u8 *dst_addr, unsigned short vid) { struct batadv_orig_node *orig_node = NULL; struct batadv_hard_iface *primary_if = NULL; bool ret = false; - uint8_t *orig_addr, orig_ttvn; + u8 *orig_addr, orig_ttvn; if (batadv_is_my_client(bat_priv, dst_addr, vid)) { primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; orig_addr = primary_if->net_dev->dev_addr; - orig_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); + orig_ttvn = (u8)atomic_read(&bat_priv->tt.vn); } else { orig_node = batadv_transtable_search(bat_priv, NULL, dst_addr, vid); @@ -676,7 +676,7 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, goto out; orig_addr = orig_node->orig; - orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + orig_ttvn = (u8)atomic_read(&orig_node->last_ttvn); } /* update the packet header */ @@ -698,7 +698,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, struct batadv_unicast_packet *unicast_packet; struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; - uint8_t curr_ttvn, old_ttvn; + u8 curr_ttvn, old_ttvn; struct ethhdr *ethhdr; unsigned short vid; int is_old_ttvn; @@ -740,7 +740,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * value is used later to check if the node which sent (or re-routed * last time) the packet had an updated information or not */ - curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); + curr_ttvn = (u8)atomic_read(&bat_priv->tt.vn); if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); @@ -751,7 +751,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (!orig_node) return 0; - curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn); batadv_orig_node_free_ref(orig_node); } @@ -833,7 +833,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; - uint8_t *orig_addr; + u8 *orig_addr; struct batadv_orig_node *orig_node = NULL; int check, hdr_size = sizeof(*unicast_packet); bool is4addr; @@ -904,7 +904,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; unsigned char *tvlv_buff; - uint16_t tvlv_buff_len; + u16 tvlv_buff_len; int hdr_size = sizeof(*unicast_tvlv_packet); int ret = NET_RX_DROP; @@ -1007,8 +1007,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, struct ethhdr *ethhdr; int hdr_size = sizeof(*bcast_packet); int ret = NET_RX_DROP; - int32_t seq_diff; - uint32_t seqno; + s32 seq_diff; + u32 seqno; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 6bc29d33abc1..204bbe4952a6 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -22,10 +22,6 @@ #include <linux/types.h> -struct batadv_hard_iface; -struct batadv_neigh_node; -struct batadv_orig_node; -struct batadv_priv; struct sk_buff; bool batadv_check_management_packet(struct sk_buff *skb, @@ -55,7 +51,7 @@ struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); -int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff, +int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, unsigned long *last_reset); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 191076ef1eca..f664324805eb 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -54,7 +54,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work); */ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, - const uint8_t *dst_addr) + const u8 *dst_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct ethhdr *ethhdr; @@ -172,7 +172,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_unicast_packet *unicast_packet; - uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + u8 ttvn = (u8)atomic_read(&orig_node->last_ttvn); if (batadv_skb_head_push(skb, hdr_size) < 0) return false; @@ -343,12 +343,12 @@ out: */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, uint8_t *dst_hint, + int packet_subtype, u8 *dst_hint, unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; - uint8_t *src, *dst; + u8 *src, *dst; src = ethhdr->h_source; dst = ethhdr->h_dest; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 0536835fe503..82059f259e46 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -25,15 +25,12 @@ #include "packet.h" -struct batadv_hard_iface; -struct batadv_orig_node; -struct batadv_priv; struct sk_buff; struct work_struct; int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, - const uint8_t *dst_addr); + const u8 *dst_addr); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); @@ -56,7 +53,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, unsigned short vid); int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, uint8_t *dst_hint, + int packet_subtype, u8 *dst_hint, unsigned short vid); int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -75,7 +72,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, - struct sk_buff *skb, uint8_t *dst_hint, + struct sk_buff *skb, u8 *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0, @@ -100,7 +97,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_subtype, - uint8_t *dst_hint, + u8 *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 51cda3a7c51d..ac4d08de5df4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -131,7 +131,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_softif_vlan *vlan; struct sockaddr *addr = p; - uint8_t old_addr[ETH_ALEN]; + u8 old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -186,19 +186,19 @@ static int batadv_interface_tx(struct sk_buff *skb, struct batadv_hard_iface *primary_if = NULL; struct batadv_bcast_packet *bcast_packet; __be16 ethertype = htons(ETH_P_BATMAN); - static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, - 0x00, 0x00}; - static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, - 0x00, 0x00}; + static const u8 stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, + 0x00, 0x00}; + static const u8 ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, + 0x00, 0x00}; enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; - uint8_t *dst_hint = NULL, chaddr[ETH_ALEN]; + u8 *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; unsigned long brd_delay = 1; bool do_bcast = false, client_added; unsigned short vid; - uint32_t seqno; + u32 seqno; int gw_mode; enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; @@ -750,9 +750,9 @@ static void batadv_softif_destroy_finish(struct work_struct *work) static int batadv_softif_init_late(struct net_device *dev) { struct batadv_priv *bat_priv; - uint32_t random_seqno; + u32 random_seqno; int ret; - size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; + size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM; batadv_set_lockdep_class(dev); @@ -763,7 +763,7 @@ static int batadv_softif_init_late(struct net_device *dev) /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called */ - bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); + bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(u64)); if (!bat_priv->bat_counters) return -ENOMEM; @@ -941,14 +941,12 @@ static void batadv_softif_init_early(struct net_device *dev) dev->netdev_ops = &batadv_netdev_ops; dev->destructor = batadv_softif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; /* can't call min_mtu, because the needed variables * have not been initialized yet */ dev->mtu = ETH_DATA_LEN; - /* reserve more space in the skbuff for our header */ - dev->hard_header_len = batadv_max_header_len(); /* generate random address */ eth_hw_addr_random(dev); @@ -1117,8 +1115,7 @@ static const struct { #endif }; -static void batadv_get_strings(struct net_device *dev, uint32_t stringset, - uint8_t *data) +static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) { if (stringset == ETH_SS_STATS) memcpy(data, batadv_counters_strings, @@ -1126,8 +1123,7 @@ static void batadv_get_strings(struct net_device *dev, uint32_t stringset, } static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) + struct ethtool_stats *stats, u64 *data) { struct batadv_priv *bat_priv = netdev_priv(dev); int i; diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 578e8a663c30..8e82176f40b1 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -22,10 +22,6 @@ #include <net/rtnetlink.h> -struct batadv_hard_iface; -struct batadv_orig_node; -struct batadv_priv; -struct batadv_softif_vlan; struct net_device; struct sk_buff; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index d6a312a82c03..9de3c8804ff4 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -457,7 +457,7 @@ static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - uint32_t down, up; + u32 down, up; down = atomic_read(&bat_priv->gw.bandwidth_down); up = atomic_read(&bat_priv->gw.bandwidth_up); @@ -512,7 +512,7 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, { struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_priv *bat_priv = netdev_priv(net_dev); - uint32_t mark, mask; + u32 mark, mask; char *mask_ptr; /* parse the mask if it has been specified, otherwise assume the mask is diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 2294583f7cf9..61974428a7af 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -23,8 +23,6 @@ #include <linux/sysfs.h> #include <linux/types.h> -struct batadv_priv; -struct batadv_softif_vlan; struct kobject; struct net_device; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index db06de20d996..4228b10c47ea 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -56,7 +56,7 @@ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; -static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, +static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, unsigned short vid, struct batadv_orig_node *orig_node); static void batadv_tt_purge(struct work_struct *work); @@ -85,10 +85,10 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) * Returns the hash index where the object represented by 'data' should be * stored at. */ -static inline uint32_t batadv_choose_tt(const void *data, uint32_t size) +static inline u32 batadv_choose_tt(const void *data, u32 size) { struct batadv_tt_common_entry *tt; - uint32_t hash = 0; + u32 hash = 0; tt = (struct batadv_tt_common_entry *)data; hash = jhash(&tt->addr, ETH_ALEN, hash); @@ -107,12 +107,12 @@ static inline uint32_t batadv_choose_tt(const void *data, uint32_t size) * found, NULL otherwise. */ static struct batadv_tt_common_entry * -batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr, +batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, unsigned short vid) { struct hlist_head *head; struct batadv_tt_common_entry to_search, *tt, *tt_tmp = NULL; - uint32_t index; + u32 index; if (!hash) return NULL; @@ -152,7 +152,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr, * found, NULL otherwise. */ static struct batadv_tt_local_entry * -batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr, +batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; @@ -177,7 +177,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr, * is found, NULL otherwise. */ static struct batadv_tt_global_entry * -batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const uint8_t *addr, +batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; @@ -223,7 +223,7 @@ batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) * (excluding ourself). */ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid) + const u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt_global_entry; int count; @@ -315,7 +315,7 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node, if (atomic_add_return(v, &vlan->tt.num_entries) == 0) { spin_lock_bh(&orig_node->vlan_list_lock); - list_del_rcu(&vlan->list); + hlist_del_init_rcu(&vlan->list); spin_unlock_bh(&orig_node->vlan_list_lock); batadv_orig_node_vlan_free_ref(vlan); } @@ -364,11 +364,11 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, - uint8_t event_flags) + u8 event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; struct batadv_tt_common_entry *common = &tt_local_entry->common; - uint8_t flags = common->flags | event_flags; + u8 flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -448,7 +448,7 @@ static int batadv_tt_len(int changes_num) * * Returns the number of entries. */ -static uint16_t batadv_tt_entries(uint16_t tt_len) +static u16 batadv_tt_entries(u16 tt_len) { return tt_len / batadv_tt_len(1); } @@ -462,7 +462,8 @@ static uint16_t batadv_tt_entries(uint16_t tt_len) */ static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) { - uint16_t num_vlan = 0, tt_local_entries = 0; + u16 num_vlan = 0; + u16 tt_local_entries = 0; struct batadv_softif_vlan *vlan; int hdr_size; @@ -525,8 +526,8 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * * Returns true if the client was successfully added, false otherwise. */ -bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex, uint32_t mark) +bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, + unsigned short vid, int ifindex, u32 mark) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; @@ -536,9 +537,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; int hash_added, table_size, packet_size_max; - bool ret = false, roamed_back = false; - uint8_t remote_flags; - uint32_t match_mark; + bool ret = false; + bool roamed_back = false; + u8 remote_flags; + u32 match_mark; if (ifindex != BATADV_NULL_IFINDEX) in_dev = dev_get_by_index(&init_net, ifindex); @@ -596,13 +598,16 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) + addr, BATADV_PRINT_VID(vid))) { + kfree(tt_local); + tt_local = NULL; goto out; + } batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", addr, BATADV_PRINT_VID(vid), - (uint8_t)atomic_read(&bat_priv->tt.vn)); + (u8)atomic_read(&bat_priv->tt.vn)); ether_addr_copy(tt_local->common.addr, addr); /* The local entry has to be marked as NEW to avoid to send it in @@ -721,19 +726,22 @@ out: * * Return the size of the allocated buffer or 0 in case of failure. */ -static uint16_t +static u16 batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_data **tt_data, struct batadv_tvlv_tt_change **tt_change, - int32_t *tt_len) + s32 *tt_len) { - uint16_t num_vlan = 0, num_entries = 0, change_offset, tvlv_len; + u16 num_vlan = 0; + u16 num_entries = 0; + u16 change_offset; + u16 tvlv_len; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; - uint8_t *tt_change_ptr; + u8 *tt_change_ptr; rcu_read_lock(); - list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); } @@ -759,14 +767,14 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); - list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan++; } - tt_change_ptr = (uint8_t *)*tt_data + change_offset; + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: @@ -792,16 +800,18 @@ out: * * Return the size of the allocated buffer or 0 in case of failure. */ -static uint16_t +static u16 batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data **tt_data, struct batadv_tvlv_tt_change **tt_change, - int32_t *tt_len) + s32 *tt_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; - uint16_t num_vlan = 0, num_entries = 0, tvlv_len; - uint8_t *tt_change_ptr; + u16 num_vlan = 0; + u16 num_entries = 0; + u16 tvlv_len; + u8 *tt_change_ptr; int change_offset; rcu_read_lock(); @@ -838,7 +848,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan++; } - tt_change_ptr = (uint8_t *)*tt_data + change_offset; + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: @@ -857,8 +867,9 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) struct batadv_tvlv_tt_data *tt_data; struct batadv_tvlv_tt_change *tt_change; int tt_diff_len, tt_change_len = 0; - int tt_diff_entries_num = 0, tt_diff_entries_count = 0; - uint16_t tvlv_len; + int tt_diff_entries_num = 0; + int tt_diff_entries_count = 0; + u16 tvlv_len; tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); tt_diff_len = batadv_tt_len(tt_diff_entries_num); @@ -932,12 +943,12 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_softif_vlan *vlan; struct hlist_head *head; unsigned short vid; - uint32_t i; + u32 i; int last_seen_secs; int last_seen_msecs; unsigned long last_seen_jiffies; bool no_purge; - uint16_t np_flag = BATADV_TT_CLIENT_NOPURGE; + u16 np_flag = BATADV_TT_CLIENT_NOPURGE; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) @@ -945,7 +956,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", - net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); + net_dev->name, (u8)atomic_read(&bat_priv->tt.vn)); seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID", "Flags", "Last seen", "CRC"); @@ -1005,7 +1016,7 @@ out: static void batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, - uint16_t flags, const char *message) + u16 flags, const char *message) { batadv_tt_local_event(bat_priv, tt_local_entry, flags); @@ -1031,12 +1042,12 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, * * Returns the flags assigned to the local entry before being deleted */ -uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid, - const char *message, bool roaming) +u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, + unsigned short vid, const char *message, + bool roaming) { struct batadv_tt_local_entry *tt_local_entry; - uint16_t flags, curr_flags = BATADV_NO_FLAGS; + u16 flags, curr_flags = BATADV_NO_FLAGS; struct batadv_softif_vlan *vlan; void *tt_entry_exists; @@ -1139,7 +1150,7 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv, struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ - uint32_t i; + u32 i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1160,7 +1171,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; - uint32_t i; + u32 i; if (!bat_priv->tt.local_hash) return; @@ -1335,15 +1346,14 @@ out: static bool batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *tt_addr, - unsigned short vid, uint16_t flags, - uint8_t ttvn) + unsigned short vid, u16 flags, u8 ttvn) { struct batadv_tt_global_entry *tt_global_entry; struct batadv_tt_local_entry *tt_local_entry; bool ret = false; int hash_added; struct batadv_tt_common_entry *common; - uint16_t local_flags; + u16 local_flags; /* ignore global entries from backbone nodes */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) @@ -1540,8 +1550,8 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, struct batadv_tt_common_entry *tt_common_entry; struct batadv_orig_node_vlan *vlan; struct hlist_head *head; - uint8_t last_ttvn; - uint16_t flags; + u8 last_ttvn; + u16 flags; tt_common_entry = &tt_global_entry->common; flags = tt_common_entry->flags; @@ -1615,7 +1625,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; struct hlist_head *head; - uint32_t i; + u32 i; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) @@ -1648,20 +1658,28 @@ out: } /** - * batadv_tt_global_del_orig_entry - remove and free an orig_entry + * _batadv_tt_global_del_orig_entry - remove and free an orig_entry * @tt_global_entry: the global entry to remove the orig_entry from * @orig_entry: the orig entry to remove and free * * Remove an orig_entry from its list in the given tt_global_entry and * free this orig_entry afterwards. + * + * Caller must hold tt_global_entry->list_lock and ensure orig_entry->list is + * part of a list. */ static void -batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry, - struct batadv_tt_orig_list_entry *orig_entry) +_batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry, + struct batadv_tt_orig_list_entry *orig_entry) { + lockdep_assert_held(&tt_global_entry->list_lock); + batadv_tt_global_size_dec(orig_entry->orig_node, tt_global_entry->common.vid); atomic_dec(&tt_global_entry->orig_list_count); + /* requires holding tt_global_entry->list_lock and orig_entry->list + * being part of a list + */ hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } @@ -1677,7 +1695,7 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; hlist_for_each_entry_safe(orig_entry, safe, head, list) - batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); + _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); spin_unlock_bh(&tt_global_entry->list_lock); } @@ -1712,8 +1730,8 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, orig_node->orig, tt_global_entry->common.addr, BATADV_PRINT_VID(vid), message); - batadv_tt_global_del_orig_entry(tt_global_entry, - orig_entry); + _batadv_tt_global_del_orig_entry(tt_global_entry, + orig_entry); } } spin_unlock_bh(&tt_global_entry->list_lock); @@ -1835,12 +1853,12 @@ out: */ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - int32_t match_vid, + s32 match_vid, const char *message) { struct batadv_tt_global_entry *tt_global; struct batadv_tt_common_entry *tt_common_entry; - uint32_t i; + u32 i; struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_node *safe; struct hlist_head *head; @@ -1911,7 +1929,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) struct hlist_head *head; struct hlist_node *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ - uint32_t i; + u32 i; char *msg = NULL; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; @@ -1952,7 +1970,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) struct batadv_tt_global_entry *tt_global; struct hlist_node *node_tmp; struct hlist_head *head; - uint32_t i; + u32 i; if (!bat_priv->tt.global_hash) return; @@ -2013,8 +2031,8 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, * If the two clients are AP isolated the function returns NULL. */ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, - const uint8_t *src, - const uint8_t *addr, + const u8 *src, + const u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry = NULL; @@ -2082,16 +2100,16 @@ out: * * Returns the checksum of the global table of a given originator. */ -static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - unsigned short vid) +static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - uint32_t i, crc_tmp, crc = 0; - uint8_t flags; + u32 i, crc_tmp, crc = 0; + u8 flags; __be16 tmp_vid; for (i = 0; i < hash->size; i++) { @@ -2159,14 +2177,14 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, * * Returns the checksum of the local table */ -static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, - unsigned short vid) +static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, + unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_head *head; - uint32_t i, crc_tmp, crc = 0; - uint8_t flags; + u32 i, crc_tmp, crc = 0; + u8 flags; __be16 tmp_vid; for (i = 0; i < hash->size; i++) { @@ -2208,12 +2226,13 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv, static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { - struct batadv_tt_req_node *node, *safe; + struct batadv_tt_req_node *node; + struct hlist_node *safe; spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { - list_del_init(&node->list); + hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { + hlist_del_init(&node->list); kfree(node); } @@ -2223,7 +2242,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const void *tt_buff, - uint16_t tt_buff_len) + u16 tt_buff_len) { /* Replace the old buffer only if I received something in the * last OGM (the OGM could carry no changes) @@ -2243,30 +2262,36 @@ static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, static void batadv_tt_req_purge(struct batadv_priv *bat_priv) { - struct batadv_tt_req_node *node, *safe; + struct batadv_tt_req_node *node; + struct hlist_node *safe; spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { + hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { - list_del_init(&node->list); + hlist_del_init(&node->list); kfree(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); } -/* returns the pointer to the new tt_req_node struct if no request - * has already been issued for this orig_node, NULL otherwise +/** + * batadv_tt_req_node_new - search and possibly create a tt_req_node object + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node this request is being issued for + * + * Returns the pointer to the new tt_req_node struct if no request + * has already been issued for this orig_node, NULL otherwise. */ static struct batadv_tt_req_node * -batadv_new_tt_req_node(struct batadv_priv *bat_priv, +batadv_tt_req_node_new(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) { + hlist_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) { if (batadv_compare_eth(tt_req_node_tmp, orig_node) && !batadv_has_timed_out(tt_req_node_tmp->issued_at, BATADV_TT_REQUEST_TIMEOUT)) @@ -2280,7 +2305,7 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv, ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; - list_add(&tt_req_node->list, &bat_priv->tt.req_list); + hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); return tt_req_node; @@ -2332,15 +2357,15 @@ static int batadv_tt_global_valid(const void *entry_ptr, */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, - void *tvlv_buff, uint16_t tt_len, + void *tvlv_buff, u16 tt_len, int (*valid_cb)(const void *, const void *), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; - uint16_t tt_tot, tt_num_entries = 0; - uint32_t i; + u16 tt_tot, tt_num_entries = 0; + u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; @@ -2382,11 +2407,11 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, */ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_vlan_data *tt_vlan, - uint16_t num_vlan) + u16 num_vlan) { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; - uint32_t crc; + u32 crc; int i; /* check if each received CRC matches the locally stored one */ @@ -2441,11 +2466,11 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_orig_node_vlan *vlan; - uint32_t crc; + u32 crc; /* recompute the global CRC for each VLAN */ rcu_read_lock(); - list_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { + hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { /* if orig_node is a backbone node for this VLAN, don't compute * the CRC as we ignore all the global entries over it */ @@ -2471,9 +2496,9 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, */ static int batadv_send_tt_request(struct batadv_priv *bat_priv, struct batadv_orig_node *dst_orig_node, - uint8_t ttvn, + u8 ttvn, struct batadv_tvlv_tt_vlan_data *tt_vlan, - uint16_t num_vlan, bool full_table) + u16 num_vlan, bool full_table) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tt_req_node *tt_req_node = NULL; @@ -2489,7 +2514,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, /* The new tt_req will be issued only if I'm not waiting for a * reply from the same orig_node yet */ - tt_req_node = batadv_new_tt_req_node(bat_priv, dst_orig_node); + tt_req_node = batadv_tt_req_node_new(bat_priv, dst_orig_node); if (!tt_req_node) goto out; @@ -2531,8 +2556,8 @@ out: batadv_hardif_free_ref(primary_if); if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); - /* list_del_init() verifies tt_req_node still is in the list */ - list_del_init(&tt_req_node->list); + /* hlist_del_init() verifies tt_req_node still is in the list */ + hlist_del_init(&tt_req_node->list); spin_unlock_bh(&bat_priv->tt.req_list_lock); kfree(tt_req_node); } @@ -2552,7 +2577,7 @@ out: */ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, - uint8_t *req_src, uint8_t *req_dst) + u8 *req_src, u8 *req_dst) { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; @@ -2560,9 +2585,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tvlv_tt_vlan_data *tt_vlan; bool ret = false, full_table; - uint8_t orig_ttvn, req_ttvn; - uint16_t tvlv_len; - int32_t tt_len; + u8 orig_ttvn, req_ttvn; + u16 tvlv_len; + s32 tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n", @@ -2578,7 +2603,7 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, if (!res_dst_orig_node) goto out; - orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); + orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_data->ttvn; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); @@ -2684,16 +2709,16 @@ out: */ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, - uint8_t *req_src) + u8 *req_src) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_hard_iface *primary_if = NULL; struct batadv_tvlv_tt_change *tt_change; struct batadv_orig_node *orig_node; - uint8_t my_ttvn, req_ttvn; - uint16_t tvlv_len; + u8 my_ttvn, req_ttvn; + u16 tvlv_len; bool full_table; - int32_t tt_len; + s32 tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n", @@ -2702,7 +2727,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.commit_lock); - my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); + my_ttvn = (u8)atomic_read(&bat_priv->tt.vn); req_ttvn = tt_data->ttvn; orig_node = batadv_orig_hash_find(bat_priv, req_src); @@ -2741,7 +2766,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, bat_priv->tt.last_changeset_len); spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { - req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); + req_ttvn = (u8)atomic_read(&bat_priv->tt.vn); /* allocate the tvlv, put the tt_data and all the tt_vlan_data * in the initial part @@ -2802,7 +2827,7 @@ out: */ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, - uint8_t *req_src, uint8_t *req_dst) + u8 *req_src, u8 *req_dst) { if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); @@ -2813,7 +2838,7 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_change *tt_change, - uint16_t tt_num_changes, uint8_t ttvn) + u16 tt_num_changes, u8 ttvn) { int i; int roams; @@ -2845,8 +2870,8 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change, - uint8_t ttvn, uint8_t *resp_src, - uint16_t num_entries) + u8 ttvn, u8 *resp_src, + u16 num_entries) { struct batadv_orig_node *orig_node; @@ -2876,7 +2901,7 @@ out: static void batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, + u16 tt_num_changes, u8 ttvn, struct batadv_tvlv_tt_change *tt_change) { _batadv_tt_update_changes(bat_priv, orig_node, tt_change, @@ -2895,7 +2920,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, * * Returns true if the client is served by this node, false otherwise. */ -bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr, +bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; @@ -2926,13 +2951,14 @@ out: */ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, - uint8_t *resp_src, uint16_t num_entries) + u8 *resp_src, u16 num_entries) { - struct batadv_tt_req_node *node, *safe; + struct batadv_tt_req_node *node; + struct hlist_node *safe; struct batadv_orig_node *orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; - uint8_t *tvlv_ptr = (uint8_t *)tt_data; - uint16_t change_offset; + u8 *tvlv_ptr = (u8 *)tt_data; + u16 change_offset; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", @@ -2966,10 +2992,10 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, /* Delete the tt_req_node from pending tt_requests list */ spin_lock_bh(&bat_priv->tt.req_list_lock); - list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { + hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (!batadv_compare_eth(node->addr, resp_src)) continue; - list_del_init(&node->list); + hlist_del_init(&node->list); kfree(node); } @@ -3015,8 +3041,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) * * returns true if the ROAMING_ADV can be sent, false otherwise */ -static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, - uint8_t *client) +static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) { struct batadv_tt_roam_node *tt_roam_node; bool ret = false; @@ -3071,7 +3096,7 @@ unlock: * for this particular roamed client has to be forwarded to the sender of the * roaming message. */ -static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, +static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, unsigned short vid, struct batadv_orig_node *orig_node) { @@ -3149,14 +3174,14 @@ void batadv_tt_free(struct batadv_priv *bat_priv) * @enable: whether to set or unset the flag * @count: whether to increase the TT size by the number of changed entries */ -static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv, - uint16_t flags, bool enable, bool count) +static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv, u16 flags, + bool enable, bool count) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; - uint16_t changed_num = 0; + u16 changed_num = 0; struct hlist_head *head; - uint32_t i; + u32 i; if (!hash) return; @@ -3198,7 +3223,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ - uint32_t i; + u32 i; if (!hash) return; @@ -3246,6 +3271,8 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) */ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { + lockdep_assert_held(&bat_priv->tt.commit_lock); + /* Update multicast addresses in local translation table */ batadv_mcast_mla_update(bat_priv); @@ -3264,7 +3291,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) atomic_inc(&bat_priv->tt.vn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Local changes committed, updating to ttvn %u\n", - (uint8_t)atomic_read(&bat_priv->tt.vn)); + (u8)atomic_read(&bat_priv->tt.vn)); /* reset the sending counter */ atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); @@ -3283,8 +3310,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->tt.commit_lock); } -bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst, unsigned short vid) +bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, + unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry = NULL; struct batadv_tt_global_entry *tt_global_entry = NULL; @@ -3332,11 +3359,11 @@ out: */ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const void *tt_buff, uint16_t tt_num_vlan, + const void *tt_buff, u16 tt_num_vlan, struct batadv_tvlv_tt_change *tt_change, - uint16_t tt_num_changes, uint8_t ttvn) + u16 tt_num_changes, u8 ttvn) { - uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + u8 orig_ttvn = (u8)atomic_read(&orig_node->last_ttvn); struct batadv_tvlv_tt_vlan_data *tt_vlan; bool full_table = true; bool has_tt_init; @@ -3415,7 +3442,7 @@ request_table: * deleted later by a DEL or because of timeout */ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr, unsigned short vid) + u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt_global_entry; bool ret = false; @@ -3441,7 +3468,7 @@ out: * to keep the latter consistent with the node TTVN */ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr, unsigned short vid) + u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; bool ret = false; @@ -3527,13 +3554,13 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) */ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, void *tvlv_value, - uint16_t tvlv_value_len) + u8 flags, void *tvlv_value, + u16 tvlv_value_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; - uint16_t num_entries, num_vlan; + u16 num_entries, num_vlan; if (tvlv_value_len < sizeof(*tt_data)) return; @@ -3569,12 +3596,12 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, * otherwise. */ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, - uint8_t *src, uint8_t *dst, + u8 *src, u8 *dst, void *tvlv_value, - uint16_t tvlv_value_len) + u16 tvlv_value_len) { struct batadv_tvlv_tt_data *tt_data; - uint16_t tt_vlan_len, tt_num_entries; + u16 tt_vlan_len, tt_num_entries; char tt_flag; bool ret; @@ -3650,9 +3677,9 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, * otherwise. */ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, - uint8_t *src, uint8_t *dst, + u8 *src, u8 *dst, void *tvlv_value, - uint16_t tvlv_value_len) + u16 tvlv_value_len) { struct batadv_tvlv_roam_adv *roaming_adv; struct batadv_orig_node *orig_node = NULL; @@ -3734,7 +3761,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv) * otherwise */ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid) + const u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt; bool ret; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 6acc25d3a925..abd8e116e5fb 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,44 +22,41 @@ #include <linux/types.h> -struct batadv_orig_node; -struct batadv_priv; struct net_device; struct seq_file; int batadv_tt_init(struct batadv_priv *bat_priv); -bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex, uint32_t mark); -uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid, - const char *message, bool roaming); +bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, + unsigned short vid, int ifindex, u32 mark); +u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, + const u8 *addr, unsigned short vid, + const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - int32_t match_vid, const char *message); + s32 match_vid, const char *message); int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid); + const u8 *addr, unsigned short vid); struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, - const uint8_t *src, - const uint8_t *addr, + const u8 *src, const u8 *addr, unsigned short vid); void batadv_tt_free(struct batadv_priv *bat_priv); -bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr, +bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); -bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, - uint8_t *dst, unsigned short vid); +bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, + unsigned short vid); void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv); bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr, unsigned short vid); + u8 *addr, unsigned short vid); bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, - uint8_t *addr, unsigned short vid); + u8 *addr, unsigned short vid); void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface); bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid); bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, - const uint8_t *addr, unsigned short vid); + const u8 *addr, unsigned short vid); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 55610a805b53..d260efd70499 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -44,7 +44,7 @@ struct seq_file; * * *Please be careful: batadv_dat_addr_t must be UNSIGNED* */ -#define batadv_dat_addr_t uint16_t +#define batadv_dat_addr_t u16 #endif /* CONFIG_BATMAN_ADV_DAT */ @@ -103,10 +103,10 @@ struct batadv_hard_iface_bat_iv { */ struct batadv_hard_iface { struct list_head list; - int16_t if_num; + s16 if_num; char if_status; struct net_device *net_dev; - uint8_t num_bcasts; + u8 num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; @@ -132,8 +132,8 @@ struct batadv_orig_ifinfo { struct hlist_node list; struct batadv_hard_iface *if_outgoing; struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ - uint32_t last_real_seqno; - uint8_t last_ttl; + u32 last_real_seqno; + u8 last_ttl; unsigned long batman_seqno_reset; atomic_t refcount; struct rcu_head rcu; @@ -152,9 +152,9 @@ struct batadv_frag_table_entry { struct hlist_head head; spinlock_t lock; /* protects head */ unsigned long timestamp; - uint16_t seqno; - uint16_t size; - uint16_t total_size; + u16 seqno; + u16 size; + u16 total_size; }; /** @@ -166,7 +166,7 @@ struct batadv_frag_table_entry { struct batadv_frag_list_entry { struct hlist_node list; struct sk_buff *skb; - uint8_t no; + u8 no; }; /** @@ -175,7 +175,7 @@ struct batadv_frag_list_entry { * @num_entries: number of TT entries for this VLAN */ struct batadv_vlan_tt { - uint32_t crc; + u32 crc; atomic_t num_entries; }; @@ -190,7 +190,7 @@ struct batadv_vlan_tt { struct batadv_orig_node_vlan { unsigned short vid; struct batadv_vlan_tt tt; - struct list_head list; + struct hlist_node list; atomic_t refcount; struct rcu_head rcu; }; @@ -206,7 +206,7 @@ struct batadv_orig_node_vlan { */ struct batadv_orig_bat_iv { unsigned long *bcast_own; - uint8_t *bcast_own_sum; + u8 *bcast_own_sum; /* ogm_cnt_lock protects: bcast_own, bcast_own_sum, * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count */ @@ -260,7 +260,7 @@ struct batadv_orig_bat_iv { * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_orig_node { - uint8_t orig[ETH_ALEN]; + u8 orig[ETH_ALEN]; struct hlist_head ifinfo_list; struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT @@ -271,7 +271,7 @@ struct batadv_orig_node { #ifdef CONFIG_BATMAN_ADV_MCAST /* synchronizes mcast tvlv specific orig changes */ spinlock_t mcast_handler_lock; - uint8_t mcast_flags; + u8 mcast_flags; struct hlist_node mcast_want_all_unsnoopables_node; struct hlist_node mcast_want_all_ipv4_node; struct hlist_node mcast_want_all_ipv6_node; @@ -280,12 +280,12 @@ struct batadv_orig_node { unsigned long capa_initialized; atomic_t last_ttvn; unsigned char *tt_buff; - int16_t tt_buff_len; + s16 tt_buff_len; spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ /* prevents from changing the table while reading it */ spinlock_t tt_lock; DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - uint32_t last_bcast_seqno; + u32 last_bcast_seqno; struct hlist_head neigh_list; /* neigh_list_lock protects: neigh_list and router */ spinlock_t neigh_list_lock; @@ -302,7 +302,7 @@ struct batadv_orig_node { spinlock_t out_coding_list_lock; /* Protects out_coding_list */ #endif struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT]; - struct list_head vlan_list; + struct hlist_head vlan_list; spinlock_t vlan_list_lock; /* protects vlan_list */ struct batadv_orig_bat_iv bat_iv; }; @@ -328,16 +328,14 @@ enum batadv_orig_capabilities { * @orig_node: pointer to corresponding orig node * @bandwidth_down: advertised uplink download bandwidth * @bandwidth_up: advertised uplink upload bandwidth - * @deleted: this struct is scheduled for deletion * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_gw_node { struct hlist_node list; struct batadv_orig_node *orig_node; - uint32_t bandwidth_down; - uint32_t bandwidth_up; - unsigned long deleted; + u32 bandwidth_down; + u32 bandwidth_up; atomic_t refcount; struct rcu_head rcu; }; @@ -358,7 +356,7 @@ struct batadv_gw_node { struct batadv_neigh_node { struct hlist_node list; struct batadv_orig_node *orig_node; - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; struct hlist_head ifinfo_list; spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ struct batadv_hard_iface *if_incoming; @@ -378,11 +376,11 @@ struct batadv_neigh_node { * @real_packet_count: counted result of real_bits */ struct batadv_neigh_ifinfo_bat_iv { - uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; - uint8_t tq_index; - uint8_t tq_avg; + u8 tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; + u8 tq_index; + u8 tq_avg; DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - uint8_t real_packet_count; + u8 real_packet_count; }; /** @@ -398,7 +396,7 @@ struct batadv_neigh_ifinfo { struct hlist_node list; struct batadv_hard_iface *if_outgoing; struct batadv_neigh_ifinfo_bat_iv bat_iv; - uint8_t last_ttl; + u8 last_ttl; atomic_t refcount; struct rcu_head rcu; }; @@ -411,7 +409,7 @@ struct batadv_neigh_ifinfo { */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bcast_duplist_entry { - uint8_t orig[ETH_ALEN]; + u8 orig[ETH_ALEN]; __be32 crc; unsigned long entrytime; }; @@ -537,13 +535,13 @@ struct batadv_priv_tt { struct list_head changes_list; struct batadv_hashtable *local_hash; struct batadv_hashtable *global_hash; - struct list_head req_list; + struct hlist_head req_list; struct list_head roam_list; spinlock_t changes_list_lock; /* protects changes */ spinlock_t req_list_lock; /* protects req_list */ spinlock_t roam_list_lock; /* protects roam_list */ unsigned char *last_changeset; - int16_t last_changeset_len; + s16 last_changeset_len; /* protects last_changeset & last_changeset_len */ spinlock_t last_changeset_lock; /* prevents from executing a commit while reading the table */ @@ -663,7 +661,7 @@ struct batadv_priv_mcast { struct hlist_head want_all_unsnoopables_list; struct hlist_head want_all_ipv4_list; struct hlist_head want_all_ipv6_list; - uint8_t flags; + u8 flags; bool enabled; atomic_t num_disabled; atomic_t num_want_all_unsnoopables; @@ -781,7 +779,7 @@ struct batadv_priv { atomic_t mesh_state; struct net_device *soft_iface; struct net_device_stats stats; - uint64_t __percpu *bat_counters; /* Per cpu counters */ + u64 __percpu *bat_counters; /* Per cpu counters */ atomic_t aggregated_ogms; atomic_t bonding; atomic_t fragmentation; @@ -803,8 +801,8 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_t log_level; #endif - uint32_t isolation_mark; - uint32_t isolation_mark_mask; + u32 isolation_mark; + u32 isolation_mark_mask; atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; @@ -870,7 +868,7 @@ struct batadv_socket_client { struct batadv_socket_packet { struct list_head list; size_t icmp_len; - uint8_t icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; + u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; }; /** @@ -891,14 +889,14 @@ struct batadv_socket_packet { */ #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { - uint8_t orig[ETH_ALEN]; + u8 orig[ETH_ALEN]; unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; atomic_t wait_periods; atomic_t request_sent; - uint16_t crc; + u16 crc; atomic_t refcount; struct rcu_head rcu; }; @@ -914,7 +912,7 @@ struct batadv_bla_backbone_gw { * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_bla_claim { - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; @@ -936,10 +934,10 @@ struct batadv_bla_claim { * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_tt_common_entry { - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; unsigned short vid; struct hlist_node hash_entry; - uint16_t flags; + u16 flags; unsigned long added_at; atomic_t refcount; struct rcu_head rcu; @@ -981,7 +979,7 @@ struct batadv_tt_global_entry { */ struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; - uint8_t ttvn; + u8 ttvn; struct hlist_node list; atomic_t refcount; struct rcu_head rcu; @@ -1004,9 +1002,9 @@ struct batadv_tt_change_node { * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; unsigned long issued_at; - struct list_head list; + struct hlist_node list; }; /** @@ -1018,7 +1016,7 @@ struct batadv_tt_req_node { * @list: list node for batadv_priv_tt::roam_list */ struct batadv_tt_roam_node { - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; atomic_t counter; unsigned long first_time; struct list_head list; @@ -1035,7 +1033,7 @@ struct batadv_tt_roam_node { */ struct batadv_nc_node { struct list_head list; - uint8_t addr[ETH_ALEN]; + u8 addr[ETH_ALEN]; atomic_t refcount; struct rcu_head rcu; struct batadv_orig_node *orig_node; @@ -1059,8 +1057,8 @@ struct batadv_nc_path { atomic_t refcount; struct list_head packet_list; spinlock_t packet_list_lock; /* Protects packet_list */ - uint8_t next_hop[ETH_ALEN]; - uint8_t prev_hop[ETH_ALEN]; + u8 next_hop[ETH_ALEN]; + u8 prev_hop[ETH_ALEN]; unsigned long last_valid; }; @@ -1112,11 +1110,11 @@ struct batadv_skb_cb { struct batadv_forw_packet { struct hlist_node list; unsigned long send_time; - uint8_t own; + u8 own; struct sk_buff *skb; - uint16_t packet_len; - uint32_t direct_link_flags; - uint8_t num_packets; + u16 packet_len; + u32 direct_link_flags; + u8 num_packets; struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; struct batadv_hard_iface *if_outgoing; @@ -1191,7 +1189,7 @@ struct batadv_algo_ops { */ struct batadv_dat_entry { __be32 ip; - uint8_t mac_addr[ETH_ALEN]; + u8 mac_addr[ETH_ALEN]; unsigned short vid; unsigned long last_update; struct hlist_node hash_entry; @@ -1253,14 +1251,13 @@ struct batadv_tvlv_handler { struct hlist_node list; void (*ogm_handler)(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, - uint8_t flags, - void *tvlv_value, uint16_t tvlv_value_len); + u8 flags, void *tvlv_value, u16 tvlv_value_len); int (*unicast_handler)(struct batadv_priv *bat_priv, - uint8_t *src, uint8_t *dst, - void *tvlv_value, uint16_t tvlv_value_len); - uint8_t type; - uint8_t version; - uint8_t flags; + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len); + u8 type; + u8 version; + u8 flags; atomic_t refcount; struct rcu_head rcu; }; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0aa8f5cf46a1..6ed2feb51e3c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -365,8 +365,7 @@ void br_dev_setup(struct net_device *dev) dev->destructor = br_dev_free; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); - dev->tx_queue_len = 0; - dev->priv_flags = IFF_EBRIDGE; + dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0752796fe0ba..66efdc21f548 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1608,7 +1608,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, break; } - if (skb_trimmed) + if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); return err; @@ -1653,7 +1653,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } - if (skb_trimmed) + if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); return err; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0f2408f6cdfe..af5e187553fd 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -673,6 +673,21 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EADDRNOTAVAIL; } + if (!data) + return 0; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } +#endif + return 0; } @@ -729,6 +744,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -784,6 +800,16 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + __be16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]); + + err = __br_vlan_set_proto(br, vlan_proto); + if (err) + return err; + } +#endif + return 0; } @@ -796,6 +822,9 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ +#endif 0; } @@ -819,6 +848,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled)) return -EMSGSIZE; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto)) + return -EMSGSIZE; +#endif + return 0; } @@ -849,7 +883,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, - .maxtype = IFLA_BRPORT_MAX, + .maxtype = IFLA_BR_MAX, .policy = br_policy, .validate = br_validate, .newlink = br_dev_newlink, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3d95647039d0..213baf7aaa93 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -95,15 +95,15 @@ struct net_bridge_fdb_entry struct hlist_node hlist; struct net_bridge_port *dst; - struct rcu_head rcu; unsigned long updated; unsigned long used; mac_addr addr; + __u16 vlan_id; unsigned char is_local:1, is_static:1, added_by_user:1, added_by_external_learn:1; - __u16 vlan_id; + struct rcu_head rcu; }; struct net_bridge_port_group { @@ -616,6 +616,7 @@ bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3cef6892c0bb..3cd8cc9e804b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -492,23 +492,16 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) return 0; } -int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; struct net_bridge_port *p; struct net_port_vlans *pv; - __be16 proto, oldproto; + __be16 oldproto; u16 vid, errvid; - if (val != ETH_P_8021Q && val != ETH_P_8021AD) - return -EPROTONOSUPPORT; - - if (!rtnl_trylock()) - return restart_syscall(); - - proto = htons(val); if (br->vlan_proto == proto) - goto unlock; + return 0; /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { @@ -539,9 +532,7 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val) vlan_vid_del(p->dev, oldproto, vid); } -unlock: - rtnl_unlock(); - return err; + return 0; err_filt: errvid = vid; @@ -557,7 +548,23 @@ err_filt: vlan_vid_del(p->dev, proto, vid); } - goto unlock; + return err; +} + +int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +{ + int err; + + if (val != ETH_P_8021Q && val != ETH_P_8021AD) + return -EPROTONOSUPPORT; + + if (!rtnl_trylock()) + return restart_syscall(); + + err = __br_vlan_set_proto(br, htons(val)); + rtnl_unlock(); + + return err; } static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 18ca4b24c418..48b6b01295de 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -176,7 +176,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, return 0; } -static inline __pure +static inline struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) { return (void *)entry + entry->next_offset; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index edbca468fa73..d730a0f68f46 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -177,7 +177,7 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt) skb->protocol = htons(ETH_P_CAIF); /* Check if we need to handle xoff */ - if (likely(caifd->netdev->tx_queue_len == 0)) + if (likely(caifd->netdev->priv_flags & IFF_NO_QUEUE)) goto noxoff; if (unlikely(caifd->xoff)) diff --git a/net/core/dev.c b/net/core/dev.c index 4870c3556a5a..877c84834d81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3657,7 +3657,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res)) { + switch (tc_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); @@ -5311,6 +5311,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *private) { + struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -5329,6 +5330,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; + changeupper_info.upper_dev = upper_dev; + changeupper_info.master = master; + changeupper_info.linking = true; + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, master); if (ret) @@ -5367,7 +5372,8 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); return 0; rollback_lower_mesh: @@ -5462,9 +5468,14 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link_private); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j; ASSERT_RTNL(); + changeupper_info.upper_dev = upper_dev; + changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; + changeupper_info.linking = false; + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower @@ -5484,7 +5495,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev); - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -6997,6 +7009,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); + if (!dev->tx_queue_len) + dev->priv_flags |= IFF_NO_QUEUE; + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) diff --git a/net/core/dst.c b/net/core/dst.c index f8694d1b8702..477035ed7903 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -20,6 +20,7 @@ #include <net/net_namespace.h> #include <linux/sched.h> #include <linux/prefetch.h> +#include <net/lwtunnel.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -184,6 +185,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = 0; #endif + dst->lwtstate = NULL; atomic_set(&dst->__refcnt, initial_ref); dst->__use = 0; dst->lastuse = jiffies; @@ -260,6 +262,8 @@ again: if (dst->dev) dev_put(dst->dev); + lwtstate_put(dst->lwtstate); + if (dst->flags & DST_METADATA) kfree(dst); else diff --git a/net/core/filter.c b/net/core/filter.c index f8184222465e..13079f03902e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1349,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); int offset = (int) r2; __sum16 sum, *ptr; @@ -1489,13 +1489,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; - struct ip_tunnel_info *info = skb_tunnel_info(skb, AF_INET); + struct ip_tunnel_info *info = skb_tunnel_info(skb); if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info)) return -EINVAL; + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; to->tunnel_id = be64_to_cpu(info->key.tun_id); - to->remote_ipv4 = be32_to_cpu(info->key.ipv4_src); + to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); return 0; } @@ -1528,8 +1530,9 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info = &md->u.tun_info; info->mode = IP_TUNNEL_INFO_TX; + info->key.tun_flags = TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); - info->key.ipv4_dst = cpu_to_be32(from->remote_ipv4); + info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); return 0; } diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 5d6d8e3d450a..dfb1a9ca0835 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -72,7 +72,8 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, EXPORT_SYMBOL(lwtunnel_encap_del_ops); int lwtunnel_build_state(struct net_device *dev, u16 encap_type, - struct nlattr *encap, struct lwtunnel_state **lws) + struct nlattr *encap, unsigned int family, + const void *cfg, struct lwtunnel_state **lws) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; @@ -85,7 +86,7 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); if (likely(ops && ops->build_state)) - ret = ops->build_state(dev, encap, lws); + ret = ops->build_state(dev, encap, family, cfg, lws); rcu_read_unlock(); return ret; @@ -179,14 +180,16 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) } EXPORT_SYMBOL(lwtunnel_cmp_encap); -int __lwtunnel_output(struct sock *sk, struct sk_buff *skb, - struct lwtunnel_state *lwtstate) +int lwtunnel_output(struct sock *sk, struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate; int ret = -EINVAL; - if (!lwtstate) + if (!dst) goto drop; + lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || lwtstate->type > LWTUNNEL_ENCAP_MAX) @@ -209,35 +212,38 @@ drop: return ret; } +EXPORT_SYMBOL(lwtunnel_output); -int lwtunnel_output6(struct sock *sk, struct sk_buff *skb) +int lwtunnel_input(struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - struct lwtunnel_state *lwtstate = NULL; + struct dst_entry *dst = skb_dst(skb); + const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate; + int ret = -EINVAL; - if (rt) { - lwtstate = rt->rt6i_lwtstate; - skb->dev = rt->dst.dev; - } + if (!dst) + goto drop; + lwtstate = dst->lwtstate; - skb->protocol = htons(ETH_P_IPV6); + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; - return __lwtunnel_output(sk, skb, lwtstate); -} -EXPORT_SYMBOL(lwtunnel_output6); + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->input)) + ret = ops->input(skb); + rcu_read_unlock(); -int lwtunnel_output(struct sock *sk, struct sk_buff *skb) -{ - struct rtable *rt = (struct rtable *)skb_dst(skb); - struct lwtunnel_state *lwtstate = NULL; + if (ret == -EOPNOTSUPP) + goto drop; - if (rt) { - lwtstate = rt->rt_lwtstate; - skb->dev = rt->dst.dev; - } + return ret; - skb->protocol = htons(ETH_P_IP); +drop: + kfree_skb(skb); - return __lwtunnel_output(sk, skb, lwtstate); + return ret; } -EXPORT_SYMBOL(lwtunnel_output); +EXPORT_SYMBOL(lwtunnel_input); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index ba3c0120786c..adef015b2f41 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -31,6 +31,7 @@ #include <trace/events/napi.h> #include <trace/events/sock.h> #include <trace/events/udp.h> +#include <trace/events/fib.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c126a878c47c..6aa3db8dfc3b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -380,6 +380,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) static atomic_t ip_ident; struct ipv6hdr *ip6h; + WARN_ON_ONCE(!irqs_disabled()); + udp_len = len + sizeof(*udph); if (np->ipv6) ip_len = udp_len + sizeof(*ip6h); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b6a19ca0f99e..dad4dd37e2aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -340,7 +340,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) if (skb && frag_size) { skb->head_frag = 1; - if (virt_to_head_page(data)->pfmemalloc) + if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; @@ -392,7 +392,7 @@ EXPORT_SYMBOL(napi_alloc_frag); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on - * @length: length to allocate + * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * * Allocate a new &sk_buff and assign it a usage count of one. The @@ -461,7 +461,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); /** * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for - * @length: length to allocate + * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages * * Allocate a new sk_buff for use in NAPI receive. This buffer will @@ -4022,8 +4022,8 @@ EXPORT_SYMBOL(skb_checksum_setup); * Otherwise returns the provided skb. Returns NULL in error cases * (e.g. transport_len exceeds skb length or out-of-memory). * - * Caller needs to set the skb transport header and release the returned skb. - * Provided skb is consumed. + * Caller needs to set the skb transport header and free any returned skb if it + * differs from the provided skb. */ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int transport_len) @@ -4032,16 +4032,12 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int len = skb_transport_offset(skb) + transport_len; int ret; - if (skb->len < len) { - kfree_skb(skb); + if (skb->len < len) return NULL; - } else if (skb->len == len) { + else if (skb->len == len) return skb; - } skb_chk = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - if (!skb_chk) return NULL; @@ -4066,8 +4062,8 @@ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, * If the skb has data beyond the given transport length, then a * trimmed & cloned skb is checked and returned. * - * Caller needs to set the skb transport header and release the returned skb. - * Provided skb is consumed. + * Caller needs to set the skb transport header and free any returned skb if it + * differs from the provided skb. */ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, @@ -4079,23 +4075,26 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, skb_chk = skb_checksum_maybe_trim(skb, transport_len); if (!skb_chk) - return NULL; + goto err; - if (!pskb_may_pull(skb_chk, offset)) { - kfree_skb(skb_chk); - return NULL; - } + if (!pskb_may_pull(skb_chk, offset)) + goto err; __skb_pull(skb_chk, offset); ret = skb_chkf(skb_chk); __skb_push(skb_chk, offset); - if (ret) { - kfree_skb(skb_chk); - return NULL; - } + if (ret) + goto err; return skb_chk; + +err: + if (skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return NULL; + } EXPORT_SYMBOL(skb_checksum_trimmed); diff --git a/net/core/sock.c b/net/core/sock.c index 193901d09757..ca2984afe16e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2078,7 +2078,7 @@ suppress_allocation: EXPORT_SYMBOL(__sk_mem_schedule); /** - * __sk_reclaim - reclaim memory_allocated + * __sk_mem_reclaim - reclaim memory_allocated * @sk: socket * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) */ diff --git a/net/core/utils.c b/net/core/utils.c index a7732a068043..3dffce953c39 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -301,7 +301,7 @@ out: EXPORT_SYMBOL(in6_pton); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr) + __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); @@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr) + bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], @@ -336,6 +336,19 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace16); +void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + __wsum diff, bool pseudohdr) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_add(diff, ~skb->csum); + } else if (pseudohdr) { + *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); + } +} +EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); + struct __net_random_once_work { struct work_struct work; struct static_key *key; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 78d4ac97aae3..053eb2b8e682 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -554,6 +554,31 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, return 0; } +static int dsa_of_probe_links(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, int port_index, + struct device_node *port, + const char *port_name) +{ + struct device_node *link; + int link_index; + int ret; + + for (link_index = 0;; link_index++) { + link = of_parse_phandle(port, "link", link_index); + if (!link) + break; + + if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { + ret = dsa_of_setup_routing_table(pd, cd, chip_index, + port_index, link); + if (ret) + return ret; + } + } + return 0; +} + static void dsa_of_free_platform_data(struct dsa_platform_data *pd) { int i; @@ -573,7 +598,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) static int dsa_of_probe(struct device *dev) { struct device_node *np = dev->of_node; - struct device_node *child, *mdio, *ethernet, *port, *link; + struct device_node *child, *mdio, *ethernet, *port; struct mii_bus *mdio_bus, *mdio_bus_switch; struct net_device *ethernet_dev; struct dsa_platform_data *pd; @@ -668,15 +693,10 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - link = of_parse_phandle(port, "link", 0); - - if (!strcmp(port_name, "dsa") && link && - pd->nr_chips > 1) { - ret = dsa_of_setup_routing_table(pd, cd, - chip_index, port_index, link); - if (ret) - goto out_free_chip; - } + ret = dsa_of_probe_links(pd, cd, chip_index, + port_index, port, port_name); + if (ret) + goto out_free_chip; } } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 373ff315030d..cce97385f743 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1147,7 +1147,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->features = master->vlan_features; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); - slave_dev->tx_queue_len = 0; + slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 44d27469ae55..35a9788bb3ae 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -392,7 +392,7 @@ void hsr_dev_setup(struct net_device *dev) dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; dev->destructor = hsr_dev_destroy; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 27c25ad935b4..953b1c49f5d1 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -90,7 +90,7 @@ static void lowpan_setup(struct net_device *dev) dev->hard_header_len = 2 + 1 + 20 + 14; dev->needed_tailroom = 2; /* FCS */ dev->mtu = IPV6_MIN_MTU; - dev->tx_queue_len = 0; + dev->priv_flags |= IFF_NO_QUEUE; dev->flags = IFF_BROADCAST | IFF_MULTICAST; dev->watchdog_timeo = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 6fb3c90ad726..416dfa004cfb 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -331,20 +331,6 @@ config NET_FOU_IP_TUNNELS When this option is enabled IP tunnels can be configured to use FOU or GUE encapsulation. -config GENEVE_CORE - tristate "Generic Network Virtualization Encapsulation library" - depends on INET - select NET_UDP_TUNNEL - ---help--- - This allows one to create Geneve virtual interfaces that provide - Layer 2 Networks over Layer 3 Networks. Geneve is often used - to tunnel virtual network infrastructure in virtualized environments. - For more information see: - http://tools.ietf.org/html/draft-gross-geneve-01 - - To compile this driver as a module, choose M here: the module - - config INET_AH tristate "IP: AH transformation" select XFRM_ALGO diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index efc43f300b8c..89aacb630a53 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -57,7 +57,6 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o -obj-$(CONFIG_GENEVE_CORE) += geneve_core.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c8b855882fa5..675e88cac2b4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -450,15 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (sk->sk_bound_dev_if) { - struct net_device *dev; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); - if (dev) - tb_id = vrf_dev_table_rcu(dev) ? : tb_id; - rcu_read_unlock(); - } + tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index ac9a32ec3ee4..f2a71025a770 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -360,8 +360,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + ahp->icv_trunc_len + seqhi_len); - if (!work_iph) + if (!work_iph) { + err = -ENOMEM; goto out; + } seqhi = (__be32 *)((char *)work_iph + ihl); auth_data = ah_tmp_auth(seqhi, seqhi_len); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7fa277176c33..4036c94dfbe1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -46,6 +46,7 @@ #include <net/rtnetlink.h> #include <net/xfrm.h> #include <net/vrf.h> +#include <trace/events/fib.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -344,6 +345,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; + trace_fib_validate_source(dev, &fl4); + net = dev_net(dev); if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c8025851dac7..1b2d01170a4d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -511,7 +511,8 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, dev = __dev_get_by_index(net, cfg->fc_oif); ret = lwtunnel_build_state(dev, nla_get_u16( nla_entype), - nla, &lwtstate); + nla, AF_INET, cfg, + &lwtstate); if (ret) goto errout; nexthop_nh->nh_lwtstate = @@ -533,25 +534,28 @@ errout: #endif -int fib_encap_match(struct net *net, u16 encap_type, - struct nlattr *encap, - int oif, const struct fib_nh *nh) +static int fib_encap_match(struct net *net, u16 encap_type, + struct nlattr *encap, + int oif, const struct fib_nh *nh, + const struct fib_config *cfg) { struct lwtunnel_state *lwtstate; struct net_device *dev = NULL; - int ret; + int ret, result = 0; if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; if (oif) dev = __dev_get_by_index(net, oif); - ret = lwtunnel_build_state(dev, encap_type, - encap, &lwtstate); - if (!ret) - return lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + ret = lwtunnel_build_state(dev, encap_type, encap, + AF_INET, cfg, &lwtstate); + if (!ret) { + result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + lwtstate_free(lwtstate); + } - return 0; + return result; } int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) @@ -569,7 +573,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, cfg->fc_oif, - fi->fib_nh)) + fi->fib_nh, cfg)) return 1; } if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && @@ -661,7 +665,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { - int err; + int err = 0; struct net *net; struct net_device *dev; @@ -710,9 +714,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, err = fib_table_lookup(tbl, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE | FIB_LOOKUP_NOREF); - else + + /* on error or if no table given do full lookup. This + * is needed for example when nexthops are in the local + * table rather than the given table + */ + if (!tbl || err) { err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE); + } + if (err) { rcu_read_unlock(); return err; @@ -996,7 +1007,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_oif) dev = __dev_get_by_index(net, cfg->fc_oif); err = lwtunnel_build_state(dev, cfg->fc_encap_type, - cfg->fc_encap, &lwtstate); + cfg->fc_encap, AF_INET, cfg, + &lwtstate); if (err) goto failure; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1243c79cb5b0..26d6ffb6d23c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,6 +81,7 @@ #include <net/sock.h> #include <net/ip_fib.h> #include <net/switchdev.h> +#include <trace/events/fib.h> #include "fib_lookup.h" #define MAX_STAT_DEPTH 32 @@ -1278,6 +1279,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, unsigned long index; t_key cindex; + trace_fib_table_lookup(tb->tb_id, flp); + pn = t->kv; cindex = 0; @@ -1442,6 +1445,8 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif + trace_fib_table_lookup_nh(nh); + return err; } } @@ -2468,7 +2473,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, key = l->key + 1; iter->pos++; - if (pos-- <= 0) + if (--pos <= 0) break; l = NULL; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 34968cd5c146..e0fcbbbcfe54 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -79,7 +79,11 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + size_t plen = sizeof(struct udphdr) + hdrlen + + max_t(size_t, offset + sizeof(u16), start); + + if (skb->remcsum_offload) + return guehdr; if (!pskb_may_pull(skb, plen)) return NULL; @@ -221,29 +225,21 @@ out_unlock: static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, - size_t hdrlen, u8 ipproto, - struct gro_remcsum *grc, bool nopartial) + size_t hdrlen, struct gro_remcsum *grc, + bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); - size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); if (skb->remcsum_offload) - return NULL; + return guehdr; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; - /* Pull checksum that will be written */ - if (skb_gro_header_hard(skb, off + plen)) { - guehdr = skb_gro_header_slow(skb, off + plen, off); - if (!guehdr) - return NULL; - } - - skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, - start, offset, grc, nopartial); + guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen, + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -307,10 +303,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, - data + doffset, hdrlen, - guehdr->proto_ctype, &grc, + data + doffset, hdrlen, &grc, !!(fou->flags & FOU_F_REMCSUM_NOPARTIAL)); + if (!guehdr) goto out; @@ -351,7 +347,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); - if (WARN_ON(!ops || !ops->callbacks.gro_receive)) + if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; pp = ops->callbacks.gro_receive(head, skb); @@ -570,7 +566,7 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_AF]) { u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); - if (family != AF_INET && family != AF_INET6) + if (family != AF_INET) return -EINVAL; cfg->udp_config.family = family; diff --git a/net/ipv4/geneve_core.c b/net/ipv4/geneve_core.c deleted file mode 100644 index 311a4ba6950a..000000000000 --- a/net/ipv4/geneve_core.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Geneve: Generic Network Virtualization Encapsulation - * - * Copyright (c) 2014 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/skbuff.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/igmp.h> -#include <linux/etherdevice.h> -#include <linux/if_ether.h> -#include <linux/if_vlan.h> -#include <linux/ethtool.h> -#include <linux/mutex.h> -#include <net/arp.h> -#include <net/ndisc.h> -#include <net/ip.h> -#include <net/ip_tunnels.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/rtnetlink.h> -#include <net/route.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> -#include <net/net_namespace.h> -#include <net/netns/generic.h> -#include <net/geneve.h> -#include <net/protocol.h> -#include <net/udp_tunnel.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6.h> -#include <net/addrconf.h> -#include <net/ip6_tunnel.h> -#include <net/ip6_checksum.h> -#endif - -/* Protects sock_list and refcounts. */ -static DEFINE_MUTEX(geneve_mutex); - -/* per-network namespace private data for this module */ -struct geneve_net { - struct list_head sock_list; -}; - -static int geneve_net_id; - -static struct geneve_sock *geneve_find_sock(struct net *net, - sa_family_t family, __be16 port) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - struct geneve_sock *gs; - - list_for_each_entry(gs, &gn->sock_list, list) { - if (inet_sk(gs->sock->sk)->inet_sport == port && - inet_sk(gs->sock->sk)->sk.sk_family == family) - return gs; - } - - return NULL; -} - -static void geneve_build_header(struct genevehdr *geneveh, - __be16 tun_flags, u8 vni[3], - u8 options_len, u8 *options) -{ - geneveh->ver = GENEVE_VER; - geneveh->opt_len = options_len / 4; - geneveh->oam = !!(tun_flags & TUNNEL_OAM); - geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); - geneveh->rsvd1 = 0; - memcpy(geneveh->vni, vni, 3); - geneveh->proto_type = htons(ETH_P_TEB); - geneveh->rsvd2 = 0; - - memcpy(geneveh->options, options, options_len); -} - -/* Transmit a fully formatted Geneve frame. - * - * When calling this function. The skb->data should point - * to the geneve header which is fully formed. - * - * This function will add other UDP tunnel headers. - */ -int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, - __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool csum, bool xnet) -{ - struct genevehdr *gnvh; - int min_headroom; - int err; - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - - err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } - - skb = vlan_hwaccel_push_inside(skb); - if (unlikely(!skb)) - return -ENOMEM; - - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); - geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); - - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - - return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet, - !csum); -} -EXPORT_SYMBOL_GPL(geneve_xmit_skb); - -static int geneve_hlen(struct genevehdr *gh) -{ - return sizeof(*gh) + gh->opt_len * 4; -} - -static struct sk_buff **geneve_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) -{ - struct sk_buff *p, **pp = NULL; - struct genevehdr *gh, *gh2; - unsigned int hlen, gh_len, off_gnv; - const struct packet_offload *ptype; - __be16 type; - int flush = 1; - - off_gnv = skb_gro_offset(skb); - hlen = off_gnv + sizeof(*gh); - gh = skb_gro_header_fast(skb, off_gnv); - if (skb_gro_header_hard(skb, hlen)) { - gh = skb_gro_header_slow(skb, hlen, off_gnv); - if (unlikely(!gh)) - goto out; - } - - if (gh->ver != GENEVE_VER || gh->oam) - goto out; - gh_len = geneve_hlen(gh); - - hlen = off_gnv + gh_len; - if (skb_gro_header_hard(skb, hlen)) { - gh = skb_gro_header_slow(skb, hlen, off_gnv); - if (unlikely(!gh)) - goto out; - } - - flush = 0; - - for (p = *head; p; p = p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - gh2 = (struct genevehdr *)(p->data + off_gnv); - if (gh->opt_len != gh2->opt_len || - memcmp(gh, gh2, gh_len)) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - } - - type = gh->proto_type; - - rcu_read_lock(); - ptype = gro_find_receive_by_type(type); - if (!ptype) { - flush = 1; - goto out_unlock; - } - - skb_gro_pull(skb, gh_len); - skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); - -out_unlock: - rcu_read_unlock(); -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} - -static int geneve_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) -{ - struct genevehdr *gh; - struct packet_offload *ptype; - __be16 type; - int gh_len; - int err = -ENOSYS; - - udp_tunnel_gro_complete(skb, nhoff); - - gh = (struct genevehdr *)(skb->data + nhoff); - gh_len = geneve_hlen(gh); - type = gh->proto_type; - - rcu_read_lock(); - ptype = gro_find_complete_by_type(type); - if (ptype) - err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); - - rcu_read_unlock(); - return err; -} - -static void geneve_notify_add_rx_port(struct geneve_sock *gs) -{ - struct sock *sk = gs->sock->sk; - sa_family_t sa_family = sk->sk_family; - int err; - - if (sa_family == AF_INET) { - err = udp_add_offload(&gs->udp_offloads); - if (err) - pr_warn("geneve: udp_add_offload failed with status %d\n", - err); - } -} - -static void geneve_notify_del_rx_port(struct geneve_sock *gs) -{ - struct sock *sk = gs->sock->sk; - sa_family_t sa_family = sk->sk_family; - - if (sa_family == AF_INET) - udp_del_offload(&gs->udp_offloads); -} - -/* Callback from net/ipv4/udp.c to receive packets */ -static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) -{ - struct genevehdr *geneveh; - struct geneve_sock *gs; - int opts_len; - - /* Need Geneve and inner Ethernet header to be present */ - if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) - goto error; - - /* Return packets with reserved bits set */ - geneveh = geneve_hdr(skb); - - if (unlikely(geneveh->ver != GENEVE_VER)) - goto error; - - if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) - goto error; - - opts_len = geneveh->opt_len * 4; - if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, - htons(ETH_P_TEB))) - goto drop; - - gs = rcu_dereference_sk_user_data(sk); - if (!gs) - goto drop; - - gs->rcv(gs, skb); - return 0; - -drop: - /* Consume bad packet */ - kfree_skb(skb); - return 0; - -error: - /* Let the UDP layer deal with the skb */ - return 1; -} - -static struct socket *geneve_create_sock(struct net *net, bool ipv6, - __be16 port) -{ - struct socket *sock; - struct udp_port_cfg udp_conf; - int err; - - memset(&udp_conf, 0, sizeof(udp_conf)); - - if (ipv6) { - udp_conf.family = AF_INET6; - } else { - udp_conf.family = AF_INET; - udp_conf.local_ip.s_addr = htonl(INADDR_ANY); - } - - udp_conf.local_udp_port = port; - - /* Open UDP socket */ - err = udp_sock_create(net, &udp_conf, &sock); - if (err < 0) - return ERR_PTR(err); - - return sock; -} - -/* Create new listen socket if needed */ -static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool ipv6) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - struct geneve_sock *gs; - struct socket *sock; - struct udp_tunnel_sock_cfg tunnel_cfg; - - gs = kzalloc(sizeof(*gs), GFP_KERNEL); - if (!gs) - return ERR_PTR(-ENOMEM); - - sock = geneve_create_sock(net, ipv6, port); - if (IS_ERR(sock)) { - kfree(gs); - return ERR_CAST(sock); - } - - gs->sock = sock; - gs->refcnt = 1; - gs->rcv = rcv; - gs->rcv_data = data; - - /* Initialize the geneve udp offloads structure */ - gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; - gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; - geneve_notify_add_rx_port(gs); - - /* Mark socket as an encapsulation socket */ - tunnel_cfg.sk_user_data = gs; - tunnel_cfg.encap_type = 1; - tunnel_cfg.encap_rcv = geneve_udp_encap_recv; - tunnel_cfg.encap_destroy = NULL; - setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - - list_add(&gs->list, &gn->sock_list); - - return gs; -} - -struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, - geneve_rcv_t *rcv, void *data, - bool no_share, bool ipv6) -{ - struct geneve_sock *gs; - - mutex_lock(&geneve_mutex); - - gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); - if (gs) { - if (!no_share && gs->rcv == rcv) - gs->refcnt++; - else - gs = ERR_PTR(-EBUSY); - } else { - gs = geneve_socket_create(net, port, rcv, data, ipv6); - } - - mutex_unlock(&geneve_mutex); - - return gs; -} -EXPORT_SYMBOL_GPL(geneve_sock_add); - -void geneve_sock_release(struct geneve_sock *gs) -{ - mutex_lock(&geneve_mutex); - - if (--gs->refcnt) - goto unlock; - - list_del(&gs->list); - geneve_notify_del_rx_port(gs); - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); - -unlock: - mutex_unlock(&geneve_mutex); -} -EXPORT_SYMBOL_GPL(geneve_sock_release); - -static __net_init int geneve_init_net(struct net *net) -{ - struct geneve_net *gn = net_generic(net, geneve_net_id); - - INIT_LIST_HEAD(&gn->sock_list); - - return 0; -} - -static struct pernet_operations geneve_net_ops = { - .init = geneve_init_net, - .id = &geneve_net_id, - .size = sizeof(struct geneve_net), -}; - -static int __init geneve_init_module(void) -{ - int rc; - - rc = register_pernet_subsys(&geneve_net_ops); - if (rc) - return rc; - - pr_info("Geneve core logic\n"); - - return 0; -} -module_init(geneve_init_module); - -static void __exit geneve_cleanup_module(void) -{ - unregister_pernet_subsys(&geneve_net_ops); -} -module_exit(geneve_cleanup_module); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); -MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic"); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c6f1ce149ffb..79fe05befcae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -309,9 +309,10 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, rc = false; if (icmp_global_allow()) { + int vif = vrf_master_ifindex(dst->dev); struct inet_peer *peer; - peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); if (peer) @@ -426,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; - fl4.flowi4_oif = vrf_master_ifindex_rcu(skb->dev) ? : skb->dev->ifindex; + fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -460,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = vrf_master_ifindex_rcu(skb_in->dev) ? : skb_in->dev->ifindex; + fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex; security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 651cdf648ec4..d38b8b61eaee 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -110,6 +110,9 @@ #define IP_MAX_MEMBERSHIPS 20 #define IP_MAX_MSF 10 +/* IGMP reports for link-local multicast groups are enabled by default */ +int sysctl_igmp_llm_reports __read_mostly = 1; + #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ @@ -437,6 +440,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; + if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports) + return skb; isquery = type == IGMPV3_MODE_IS_INCLUDE || type == IGMPV3_MODE_IS_EXCLUDE; @@ -545,6 +550,9 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; + if (ipv4_is_local_multicast(pmc->multiaddr) && + !sysctl_igmp_llm_reports) + continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) type = IGMPV3_MODE_IS_EXCLUDE; @@ -678,7 +686,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - else if (type == IGMP_HOST_LEAVE_MESSAGE) + + if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) + return 0; + + if (type == IGMP_HOST_LEAVE_MESSAGE) dst = IGMP_ALL_ROUTER; else dst = group; @@ -851,6 +863,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; + if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) + return false; rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { @@ -957,6 +971,9 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, continue; if (im->multiaddr == IGMP_ALL_HOSTS) continue; + if (ipv4_is_local_multicast(im->multiaddr) && + !sysctl_igmp_llm_reports) + continue; spin_lock_bh(&im->lock); if (im->tm_running) im->gsquery = im->gsquery && mark; @@ -1181,6 +1198,8 @@ static void igmp_group_dropped(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; + if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) + return; reporter = im->reporter; igmp_stop_timer(im); @@ -1213,6 +1232,8 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; + if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) + return; if (in_dev->dead) return; @@ -1435,33 +1456,35 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) struct sk_buff *skb_chk; unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); - int ret; + int ret = -EINVAL; transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); - skb_get(skb); skb_chk = skb_checksum_trimmed(skb, transport_len, ip_mc_validate_checksum); if (!skb_chk) - return -EINVAL; + goto err; - if (!pskb_may_pull(skb_chk, len)) { - kfree_skb(skb_chk); - return -EINVAL; - } + if (!pskb_may_pull(skb_chk, len)) + goto err; ret = ip_mc_check_igmp_msg(skb_chk); - if (ret) { - kfree_skb(skb_chk); - return ret; - } + if (ret) + goto err; if (skb_trimmed) *skb_trimmed = skb_chk; - else + /* free now unneeded clone */ + else if (skb_chk != skb) kfree_skb(skb_chk); - return 0; + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; } /** @@ -1470,7 +1493,7 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) * * Checks whether an IPv4 packet is a valid IGMP packet. If so sets - * skb network and transport headers accordingly and returns zero. + * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard @@ -1485,7 +1508,8 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) * to leave the original skb and its full frame unchanged (which might be * desirable for layer 2 frame jugglers). * - * The caller needs to release a reference count from any returned skb_trimmed. + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. */ int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) { @@ -1515,6 +1539,9 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == IGMP_ALL_HOSTS) continue; + if (ipv4_is_local_multicast(im->multiaddr) && + !sysctl_igmp_llm_reports) + continue; /* a failover is happening and switches * must be notified immediately diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05e3145f7dc3..134957159c27 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -593,7 +593,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, } spin_unlock(&queue->syn_wait_lock); - if (del_timer_sync(&req->rsk_timer)) + if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 241afd743d2c..86fa45809540 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -157,22 +157,6 @@ void __init inet_initpeers(void) INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } -static int addr_compare(const struct inetpeer_addr *a, - const struct inetpeer_addr *b) -{ - int i, n = (a->family == AF_INET ? 1 : 4); - - for (i = 0; i < n; i++) { - if (a->addr.a6[i] == b->addr.a6[i]) - continue; - if ((__force u32)a->addr.a6[i] < (__force u32)b->addr.a6[i]) - return -1; - return 1; - } - - return 0; -} - #define rcu_deref_locked(X, BASE) \ rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) @@ -188,7 +172,7 @@ static int addr_compare(const struct inetpeer_addr *a, *stackptr++ = &_base->root; \ for (u = rcu_deref_locked(_base->root, _base); \ u != peer_avl_empty;) { \ - int cmp = addr_compare(_daddr, &u->daddr); \ + int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \ if (cmp == 0) \ break; \ if (cmp == -1) \ @@ -215,7 +199,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int count = 0; while (u != peer_avl_empty) { - int cmp = addr_compare(daddr, &u->daddr); + int cmp = inetpeer_addr_cmp(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was * deleted (refcnt=-1) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 15762e758861..fa7f15305f9a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -151,7 +151,8 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) qp->vif = arg->vif; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : + NULL; } static void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fb44d693796e..bd0679d90519 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -400,25 +400,14 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) if (tunnel) { skb_pop_mac_header(skb); if (tunnel->collect_md) { - struct ip_tunnel_info *info; + __be16 flags; + __be64 tun_id; - tun_dst = metadata_dst_alloc(0, GFP_ATOMIC); + flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); + tun_id = key_to_tunnel_id(tpi->key); + tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; - - info = &tun_dst->u.tun_info; - info->key.ipv4_src = iph->saddr; - info->key.ipv4_dst = iph->daddr; - info->key.ipv4_tos = iph->tos; - info->key.ipv4_ttl = iph->ttl; - - info->mode = IP_TUNNEL_INFO_RX; - info->key.tun_flags = tpi->flags & - (TUNNEL_CSUM | TUNNEL_KEY); - info->key.tun_id = key_to_tunnel_id(tpi->key); - - info->key.tp_src = 0; - info->key.tp_dst = 0; } ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); @@ -521,15 +510,16 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df, flags; int err; - tun_info = skb_tunnel_info(skb, AF_INET); - if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX)) + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) goto err_free_skb; key = &tun_info->key; memset(&fl, 0, sizeof(fl)); - fl.daddr = key->ipv4_dst; - fl.saddr = key->ipv4_src; - fl.flowi4_tos = RT_TOS(key->ipv4_tos); + fl.daddr = key->u.ipv4.dst; + fl.saddr = key->u.ipv4.src; + fl.flowi4_tos = RT_TOS(key->tos); fl.flowi4_mark = skb->mark; fl.flowi4_proto = IPPROTO_GRE; @@ -564,8 +554,8 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr, - key->ipv4_dst, IPPROTO_GRE, - key->ipv4_tos, key->ipv4_ttl, df, false); + key->u.ipv4.dst, IPPROTO_GRE, + key->tos, key->ttl, df, false); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fd6319681c50..0c756ade1cf7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -204,6 +204,7 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { }; static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, + unsigned int family, const void *cfg, struct lwtunnel_state **ts) { struct ip_tunnel_info *tun_info; @@ -227,16 +228,16 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) - tun_info->key.ipv4_dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); + tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); if (tb[LWTUNNEL_IP_SRC]) - tun_info->key.ipv4_src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); + tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); if (tb[LWTUNNEL_IP_TTL]) - tun_info->key.ipv4_ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); + tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); if (tb[LWTUNNEL_IP_TOS]) - tun_info->key.ipv4_tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); + tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_SPORT]) tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]); @@ -262,10 +263,10 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || - nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.ipv4_dst) || - nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.ipv4_src) || - nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.ipv4_tos) || - nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ipv4_ttl) || + nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || + nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || + nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || + nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) || nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) || nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) @@ -286,15 +287,125 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */ } +static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + return memcmp(lwt_tun_info(a), lwt_tun_info(b), + sizeof(struct ip_tunnel_info)); +} + static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .build_state = ip_tun_build_state, .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, + .cmp_encap = ip_tun_cmp_encap, +}; + +static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { + [LWTUNNEL_IP6_ID] = { .type = NLA_U64 }, + [LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) }, + [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) }, + [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 }, + [LWTUNNEL_IP6_TC] = { .type = NLA_U8 }, + [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 }, + [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 }, + [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, +}; + +static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct ip_tunnel_info *tun_info; + struct lwtunnel_state *new_state; + struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; + int err; + + err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy); + if (err < 0) + return err; + + new_state = lwtunnel_state_alloc(sizeof(*tun_info)); + if (!new_state) + return -ENOMEM; + + new_state->type = LWTUNNEL_ENCAP_IP6; + + tun_info = lwt_tun_info(new_state); + + if (tb[LWTUNNEL_IP6_ID]) + tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]); + + if (tb[LWTUNNEL_IP6_DST]) + tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]); + + if (tb[LWTUNNEL_IP6_SRC]) + tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]); + + if (tb[LWTUNNEL_IP6_HOPLIMIT]) + tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]); + + if (tb[LWTUNNEL_IP6_TC]) + tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); + + if (tb[LWTUNNEL_IP6_SPORT]) + tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]); + + if (tb[LWTUNNEL_IP6_DPORT]) + tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]); + + if (tb[LWTUNNEL_IP6_FLAGS]) + tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); + + tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; + tun_info->options = NULL; + tun_info->options_len = 0; + + *ts = new_state; + + return 0; +} + +static int ip6_tun_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); + + if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || + nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || + nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || + nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || + nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || + nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) || + nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) || + nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) + return -ENOMEM; + + return 0; +} + +static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + return nla_total_size(8) /* LWTUNNEL_IP6_ID */ + + nla_total_size(16) /* LWTUNNEL_IP6_DST */ + + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ + + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ + + nla_total_size(1) /* LWTUNNEL_IP6_TC */ + + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */ + + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */ + + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */ +} + +static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { + .build_state = ip6_tun_build_state, + .fill_encap = ip6_tun_fill_encap_info, + .get_encap_size = ip6_tun_encap_nlsize, + .cmp_encap = ip_tun_cmp_encap, }; void __init ip_tunnel_core_init(void) { lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); + lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6); } struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2199a5db25e6..690d27d3f2f9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,6 +58,12 @@ config NFT_REJECT_IPV4 default NFT_REJECT tristate +config NFT_DUP_IPV4 + tristate "IPv4 nf_tables packet duplication support" + select NF_DUP_IPV4 + help + This module enables IPv4 packet duplication support for nf_tables. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP @@ -67,6 +73,12 @@ config NF_TABLES_ARP endif # NF_TABLES +config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + help + This option enables the nf_dup_ipv4 core, which duplicates an IPv4 + packet to be rerouted to another destination. + config NF_LOG_ARP tristate "ARP packet logging" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 7fe6c703528f..87b073da14c9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o +obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables @@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o # just filtering instance of ARP tables for now obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o + +obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c416cb355cb0..8f87fc38ccde 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -367,13 +367,10 @@ static inline bool unconditional(const struct arpt_arp *arp) /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. - * - * Keeps track of largest call depth seen and stores it in newinfo->stacksize. */ -static int mark_source_chains(struct xt_table_info *newinfo, +static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { - unsigned int calldepth, max_calldepth = 0; unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset @@ -389,7 +386,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, /* Set initial back pointer. */ e->counters.pcnt = pos; - calldepth = 0; for (;;) { const struct xt_standard_target *t @@ -444,8 +440,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, (entry0 + pos + size); e->counters.pcnt = pos; pos += size; - if (calldepth > 0) - --calldepth; } else { int newpos = t->verdict; @@ -460,10 +454,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, return 0; } - if (entry0 + newpos != arpt_next_entry(e) && - ++calldepth > max_calldepth) - max_calldepth = calldepth; - /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -480,7 +470,6 @@ static int mark_source_chains(struct xt_table_info *newinfo, next: duprintf("Finished chain %u\n", hook); } - newinfo->stacksize = max_calldepth; return 1; } @@ -670,6 +659,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, if (ret != 0) break; ++i; + if (strcmp(arpt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) @@ -1442,6 +1434,9 @@ static int translate_compat_table(const char *name, break; } ++i; + if (strcmp(arpt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 787f99ed55e2..b0a86e73451c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -443,15 +443,11 @@ ipt_do_table(struct sk_buff *skb, } /* Figures out from what hook each rule can be called: returns 0 if - * there are loops. Puts hook bitmask in comefrom. - * - * Keeps track of largest call depth seen and stores it in newinfo->stacksize. - */ + there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { - unsigned int calldepth, max_calldepth = 0; unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset @@ -465,7 +461,6 @@ mark_source_chains(struct xt_table_info *newinfo, /* Set initial back pointer. */ e->counters.pcnt = pos; - calldepth = 0; for (;;) { const struct xt_standard_target *t @@ -527,9 +522,6 @@ mark_source_chains(struct xt_table_info *newinfo, (entry0 + pos + size); e->counters.pcnt = pos; pos += size; - WARN_ON_ONCE(calldepth == 0); - if (calldepth > 0) - --calldepth; } else { int newpos = t->verdict; @@ -543,14 +535,9 @@ mark_source_chains(struct xt_table_info *newinfo, newpos); return 0; } - if (entry0 + newpos != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO) && - ++calldepth > max_calldepth) - max_calldepth = calldepth; - /* This a jump; chase it. */ - duprintf("Jump rule %u -> %u, calldepth %d\n", - pos, newpos, calldepth); + duprintf("Jump rule %u -> %u\n", + pos, newpos); } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -564,7 +551,6 @@ mark_source_chains(struct xt_table_info *newinfo, next: duprintf("Finished chain %u\n", hook); } - newinfo->stacksize = max_calldepth; return 1; } @@ -844,6 +830,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (ret != 0) return ret; ++i; + if (strcmp(ipt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (i != repl->num_entries) { @@ -1759,6 +1748,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ipt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4bf3dc49ad1e..270765236f5e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -72,7 +72,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) tcph->cwr = einfo->proto.tcp.cwr; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return true; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 30ad9554b5e9..8a2caaf3940b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 80d5554b9a88..cdde3ec496e9 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + const struct nf_conntrack_zone *zone; + struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index b69e82bda215..9306ec4fab41 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -43,19 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { - u16 zone = NF_CT_DEFAULT_ZONE; - + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) - zone = nf_ct_zone((struct nf_conn *)skb->nfct); + if (skb->nfct) { + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + } #endif if (nf_bridge_in_prerouting(skb)) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN + zone; + return IP_DEFRAG_CONNTRACK_IN + zone_id; else - return IP_DEFRAG_CONNTRACK_OUT + zone; + return IP_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c new file mode 100644 index 000000000000..b5bb37564b0e --- /dev/null +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -0,0 +1,120 @@ +/* + * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag> + * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de> + * + * Extracted from xt_TEE.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later, as + * published by the Free Software Foundation. + */ +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/route.h> +#include <linux/skbuff.h> +#include <net/checksum.h> +#include <net/icmp.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, + int oif) +{ + const struct iphdr *iph = ip_hdr(skb); + struct net *net = pick_net(skb); + struct rtable *rt; + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + if (oif != -1) + fl4.flowi4_oif = oif; + + fl4.daddr = gw->s_addr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return false; + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; + skb->protocol = htons(ETH_P_IP); + + return true; +} + +void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif) +{ + struct iphdr *iph; + + if (this_cpu_read(nf_skb_duplicated)) + return; + /* + * Copy the skb, and route the copy. Will later return %XT_CONTINUE for + * the original skb, which should continue on its way as if nothing has + * happened. The copy should be independently delivered to the gateway. + */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Avoid counting cloned packets towards the original connection. */ + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + /* + * If we are in PREROUTING/INPUT, the checksum must be recalculated + * since the length could have changed as a result of defragmentation. + * + * We also decrease the TTL to mitigate potential loops between two + * hosts. + * + * Set %IP_DF so that the original source is notified of a potentially + * decreased MTU on the clone route. IPv6 does this too. + */ + iph = ip_hdr(skb); + iph->frag_off |= htons(IP_DF); + if (hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_IN) + --iph->ttl; + ip_send_check(iph); + + if (nf_dup_ipv4_route(skb, gw, oif)) { + __this_cpu_write(nf_skb_duplicated, true); + ip_local_out(skb); + __this_cpu_write(nf_skb_duplicated, false); + } else { + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(nf_dup_ipv4); + +MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e59cc05c09e9..22f4579b0c2a 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -120,7 +120,7 @@ static void nf_nat_ipv4_csum_update(struct sk_buff *skb, oldip = iph->daddr; newip = t->dst.u3.ip; } - inet_proto_csum_replace4(check, skb, oldip, newip, 1); + inet_proto_csum_replace4(check, skb, oldip, newip, true); } static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, @@ -151,7 +151,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 4557b4ab8342..7b98baa13ede 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff *skb, hdr = (struct icmphdr *)(skb->data + hdroff); inet_proto_csum_replace2(&hdr->checksum, skb, - hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id, tuple->src.u.icmp.id, false); hdr->un.echo.id = tuple->src.u.icmp.id; return true; } diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c new file mode 100644 index 000000000000..b45932d43b69 --- /dev/null +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> + +struct nft_dup_ipv4 { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + struct in_addr gw = { + .s_addr = (__force __be32)regs->data[priv->sreg_addr], + }; + int oif = regs->data[priv->sreg_dev]; + + nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif); +} + +static int nft_dup_ipv4_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_DUP_SREG_ADDR] == NULL) + return -EINVAL; + + priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); + err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + if (err < 0) + return err; + + if (tb[NFTA_DUP_SREG_DEV] != NULL) { + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + } + return 0; +} + +static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_ipv4 *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_ipv4_type; +static const struct nft_expr_ops nft_dup_ipv4_ops = { + .type = &nft_dup_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)), + .eval = nft_dup_ipv4_eval, + .init = nft_dup_ipv4_init, + .dump = nft_dup_ipv4_dump, +}; + +static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 }, + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_dup_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "dup", + .ops = &nft_dup_ipv4_ops, + .policy = nft_dup_ipv4_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_ipv4_module_init(void) +{ + return nft_register_expr(&nft_dup_ipv4_type); +} + +static void __exit nft_dup_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_dup_ipv4_type); +} + +module_init(nft_dup_ipv4_module_init); +module_exit(nft_dup_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c89d294b669..5f4a5565ad8b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -838,6 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) struct inet_peer *peer; struct net *net; int log_martians; + int vif; rcu_read_lock(); in_dev = __in_dev_get_rcu(rt->dst.dev); @@ -846,10 +847,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) return; } log_martians = IN_DEV_LOG_MARTIANS(in_dev); + vif = vrf_master_ifindex_rcu(rt->dst.dev); rcu_read_unlock(); net = dev_net(rt->dst.dev); - peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); @@ -938,7 +940,8 @@ static int ip_error(struct sk_buff *skb) break; } - peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, + vrf_master_ifindex(skb->dev), 1); send = true; if (peer) { @@ -1359,7 +1362,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) list_del(&rt->rt_uncached); spin_unlock_bh(&ul->lock); } - lwtstate_put(rt->rt_lwtstate); } void rt_flush_dev(struct net_device *dev) @@ -1408,7 +1410,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - rt->rt_lwtstate = lwtstate_get(nh->nh_lwtstate); + rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) @@ -1494,7 +1496,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_lwtstate = NULL; if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1624,15 +1625,20 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_lwtstate = NULL; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); - if (lwtunnel_output_redirect(rth->rt_lwtstate)) + if (lwtunnel_output_redirect(rth->dst.lwtstate)) { + rth->dst.lwtstate->orig_output = rth->dst.output; rth->dst.output = lwtunnel_output; + } + if (lwtunnel_input_redirect(rth->dst.lwtstate)) { + rth->dst.lwtstate->orig_input = rth->dst.input; + rth->dst.input = lwtunnel_input; + } skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1689,8 +1695,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, by fib_lookup. */ - tun_info = skb_tunnel_info(skb, AF_INET); - if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX) + tun_info = skb_tunnel_info(skb); + if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; else fl4.flowi4_tun_key.tun_id = 0; @@ -1809,7 +1815,6 @@ local_input: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_lwtstate = NULL; RT_CACHE_STAT_INC(in_slow_tot); if (res.type == RTN_UNREACHABLE) { @@ -2000,7 +2005,6 @@ add: rth->rt_gateway = 0; rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); - rth->rt_lwtstate = NULL; RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) @@ -2023,7 +2027,7 @@ add: } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (lwtunnel_output_redirect(rth->rt_lwtstate)) + if (lwtunnel_output_redirect(rth->dst.lwtstate)) rth->dst.output = lwtunnel_output; return rth; @@ -2287,7 +2291,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); - rt->rt_lwtstate = NULL; dst_free(new); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 433231ccfb17..894da3a70aff 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -29,6 +29,7 @@ static int zero; static int one = 1; static int four = 4; +static int thousand = 1000; static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -41,8 +42,6 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; -static int min_sndbuf = SOCK_MIN_SNDBUF; -static int min_rcvbuf = SOCK_MIN_RCVBUF; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) @@ -530,7 +529,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_tcp_wmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, + .extra1 = &one, }, { .procname = "tcp_notsent_lowat", @@ -545,7 +544,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_tcp_rmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, + .extra1 = &one, }, { .procname = "tcp_app_win", @@ -714,6 +713,24 @@ static struct ctl_table ipv4_table[] = { .extra2 = &gso_max_segs, }, { + .procname = "tcp_pacing_ss_ratio", + .data = &sysctl_tcp_pacing_ss_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, + { + .procname = "tcp_pacing_ca_ratio", + .data = &sysctl_tcp_pacing_ca_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, + { .procname = "tcp_autocorking", .data = &sysctl_tcp_autocorking, .maxlen = sizeof(int), @@ -758,7 +775,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, + .extra1 = &one }, { .procname = "udp_wmem_min", @@ -766,7 +783,7 @@ static struct ctl_table ipv4_table[] = { .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, + .extra1 = &one }, { } }; @@ -912,6 +929,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "igmp_link_local_mcast_reports", + .data = &sysctl_igmp_llm_reports, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 45534a5ab430..b8b8fa184f75 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -627,6 +627,8 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; + + tcp_slow_start_after_idle_check(sk); } static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4e4d6bcd0ca9..dc08e2352665 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) * TCP pacing, to smooth the burst on large writes when packets * in flight is significantly lower than cwnd (or rwin) */ +int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; +int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; + static void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); u64 rate; /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ - rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); + rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); + + /* current rate is (cwnd * mss) / srtt + * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. + * In Congestion Avoidance phase, set it to 120 % the current rate. + * + * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) + * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching + * end of slow start and should slow down. + */ + if (tp->snd_cwnd < tp->snd_ssthresh / 2) + rate *= sysctl_tcp_pacing_ss_ratio; + else + rate *= sysctl_tcp_pacing_ca_ratio; rate *= max(tp->snd_cwnd, tp->packets_out); @@ -3332,6 +3348,9 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 tp->pred_flags = 0; tcp_fast_path_check(sk); + if (tcp_send_head(sk)) + tcp_slow_start_after_idle_check(sk); + if (nwin > tp->max_window) { tp->max_window = nwin; tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b3d64f61d922..c8cbc2b4b792 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -81,11 +81,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - if (a->family != b->family) - return false; - if (a->family == AF_INET) - return a->addr.a4 == b->addr.a4; - return ipv6_addr_equal(&a->addr.in6, &b->addr.in6); + return inetpeer_addr_cmp(a, b) == 0; } struct tcpm_hash_bucket { @@ -247,14 +243,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, daddr.family = req->rsk_ops->family; switch (daddr.family) { case AF_INET: - saddr.addr.a4 = inet_rsk(req)->ir_loc_addr; - daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr; - hash = (__force unsigned int) daddr.addr.a4; + inetpeer_set_addr_v4(&saddr, inet_rsk(req)->ir_loc_addr); + inetpeer_set_addr_v4(&daddr, inet_rsk(req)->ir_rmt_addr); + hash = ipv4_addr_hash(inet_rsk(req)->ir_rmt_addr); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr; - daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr; + inetpeer_set_addr_v6(&saddr, &inet_rsk(req)->ir_v6_loc_addr); + inetpeer_set_addr_v6(&daddr, &inet_rsk(req)->ir_v6_rmt_addr); hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -285,25 +281,19 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock struct net *net; if (tw->tw_family == AF_INET) { - saddr.family = AF_INET; - saddr.addr.a4 = tw->tw_rcv_saddr; - daddr.family = AF_INET; - daddr.addr.a4 = tw->tw_daddr; - hash = (__force unsigned int) daddr.addr.a4; + inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); + inetpeer_set_addr_v4(&daddr, tw->tw_daddr); + hash = ipv4_addr_hash(tw->tw_daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (tw->tw_family == AF_INET6) { if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { - saddr.family = AF_INET; - saddr.addr.a4 = tw->tw_rcv_saddr; - daddr.family = AF_INET; - daddr.addr.a4 = tw->tw_daddr; - hash = (__force unsigned int) daddr.addr.a4; + inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); + inetpeer_set_addr_v4(&daddr, tw->tw_daddr); + hash = ipv4_addr_hash(tw->tw_daddr); } else { - saddr.family = AF_INET6; - saddr.addr.in6 = tw->tw_v6_rcv_saddr; - daddr.family = AF_INET6; - daddr.addr.in6 = tw->tw_v6_daddr; + inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr); + inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr); hash = ipv6_addr_hash(&tw->tw_v6_daddr); } } @@ -335,25 +325,19 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct net *net; if (sk->sk_family == AF_INET) { - saddr.family = AF_INET; - saddr.addr.a4 = inet_sk(sk)->inet_saddr; - daddr.family = AF_INET; - daddr.addr.a4 = inet_sk(sk)->inet_daddr; - hash = (__force unsigned int) daddr.addr.a4; + inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr); + inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr); + hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { - saddr.family = AF_INET; - saddr.addr.a4 = inet_sk(sk)->inet_saddr; - daddr.family = AF_INET; - daddr.addr.a4 = inet_sk(sk)->inet_daddr; - hash = (__force unsigned int) daddr.addr.a4; + inetpeer_set_addr_v4(&saddr, inet_sk(sk)->inet_saddr); + inetpeer_set_addr_v4(&daddr, inet_sk(sk)->inet_daddr); + hash = ipv4_addr_hash(inet_sk(sk)->inet_daddr); } else { - saddr.family = AF_INET6; - saddr.addr.in6 = sk->sk_v6_rcv_saddr; - daddr.family = AF_INET6; - daddr.addr.in6 = sk->sk_v6_daddr; + inetpeer_set_addr_v6(&saddr, &sk->sk_v6_rcv_saddr); + inetpeer_set_addr_v6(&daddr, &sk->sk_v6_daddr); hash = ipv6_addr_hash(&sk->sk_v6_daddr); } } @@ -796,18 +780,18 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, switch (tm->tcpm_daddr.family) { case AF_INET: if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_daddr.addr.a4) < 0) + inetpeer_get_addr_v4(&tm->tcpm_daddr)) < 0) goto nla_put_failure; if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, - tm->tcpm_saddr.addr.a4) < 0) + inetpeer_get_addr_v4(&tm->tcpm_saddr)) < 0) goto nla_put_failure; break; case AF_INET6: if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, - &tm->tcpm_daddr.addr.in6) < 0) + inetpeer_get_addr_v6(&tm->tcpm_daddr)) < 0) goto nla_put_failure; if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, - &tm->tcpm_saddr.addr.in6) < 0) + inetpeer_get_addr_v6(&tm->tcpm_saddr)) < 0) goto nla_put_failure; break; default: @@ -956,20 +940,21 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, a = info->attrs[v4]; if (a) { - addr->family = AF_INET; - addr->addr.a4 = nla_get_in_addr(a); + inetpeer_set_addr_v4(addr, nla_get_in_addr(a)); if (hash) - *hash = (__force unsigned int) addr->addr.a4; + *hash = ipv4_addr_hash(inetpeer_get_addr_v4(addr)); return 0; } a = info->attrs[v6]; if (a) { + struct in6_addr in6; + if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; - addr->family = AF_INET6; - addr->addr.in6 = nla_get_in6_addr(a); + in6 = nla_get_in6_addr(a); + inetpeer_set_addr_v6(addr, &in6); if (hash) - *hash = ipv6_addr_hash(&addr->addr.in6); + *hash = ipv6_addr_hash(inetpeer_get_addr_v6(addr)); return 0; } return optional ? 1 : -EAFNOSUPPORT; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 444ab5beecbd..1188e4fcf23b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -137,12 +137,12 @@ static __u16 tcp_advertise_mss(struct sock *sk) } /* RFC2861. Reset CWND after idle period longer RTO to "restart window". - * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) + * This is the first part of cwnd validation mechanism. + */ +void tcp_cwnd_restart(struct sock *sk, s32 delta) { struct tcp_sock *tp = tcp_sk(sk); - s32 delta = tcp_time_stamp - tp->lsndtime; - u32 restart_cwnd = tcp_init_cwnd(tp, dst); + u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 cwnd = tp->snd_cwnd; tcp_ca_event(sk, CA_EVENT_CWND_RESTART); @@ -164,10 +164,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; - if (sysctl_tcp_slow_start_after_idle && - (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) - tcp_cwnd_restart(sk, __sk_dst_get(sk)); - tp->lsndtime = now; /* If it is a reply for ato after last received diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 933ea903f7b8..aba428626b52 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -4,9 +4,10 @@ #include <linux/udp.h> #include <linux/types.h> #include <linux/kernel.h> +#include <net/dst_metadata.h> +#include <net/net_namespace.h> #include <net/udp.h> #include <net/udp_tunnel.h> -#include <net/net_namespace.h> int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) @@ -103,4 +104,26 @@ void udp_tunnel_sock_release(struct socket *sock) } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, + __be16 flags, __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + if (family == AF_INET) + tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size); + else + tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.tp_src = udp_hdr(skb)->source; + info->key.tp_dst = udp_hdr(skb)->dest; + if (udp_hdr(skb)->check) + info->key.tun_flags |= TUNNEL_CSUM; + return tun_dst; +} +EXPORT_SYMBOL_GPL(udp_tun_rx_dst); + MODULE_LICENSE("GPL"); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 55b3c0f4dde5..bb919b28619f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,6 +15,7 @@ #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> +#include <net/vrf.h> static struct xfrm_policy_afinfo xfrm4_policy_afinfo; @@ -107,8 +108,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; + if (skb_dst(skb)) { + oif = vrf_master_ifindex(skb_dst(skb)->dev) ? + : skb_dst(skb)->dev->ifindex; + } memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 643f61339e7b..983bb999738c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -92,6 +92,25 @@ config IPV6_MIP6 If unsure, say N. +config IPV6_ILA + tristate "IPv6: Identifier Locator Addressing (ILA)" + select LWTUNNEL + ---help--- + Support for IPv6 Identifier Locator Addressing (ILA). + + ILA is a mechanism to do network virtualization without + encapsulation. The basic concept of ILA is that we split an + IPv6 address into a 64 bit locator and 64 bit identifier. The + identifier is the identity of an entity in communication + ("who") and the locator expresses the location of the + entity ("where"). + + ILA can be configured using the "encap ila" option with + "ip -6 route" command. ILA is described in + https://tools.ietf.org/html/draft-herbert-nvo3-ila-00. + + If unsure, say N. + config INET6_XFRM_TUNNEL tristate select INET6_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0f3f1999719a..2c900c7b7eb1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o +obj-$(CONFIG_IPV6_ILA) += ila.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 59242399b0b5..0f08d3b9e238 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3656,7 +3656,7 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL); out: in6_ifa_put(ifp); rtnl_unlock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index ed7d4e3f9c10..0630a4d5daaa 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -577,8 +577,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len + ahp->icv_trunc_len + seqhi_len); - if (!work_iph) + if (!work_iph) { + err = -ENOMEM; goto out; + } auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c new file mode 100644 index 000000000000..678d2df4b8d9 --- /dev/null +++ b/net/ipv6/ila.c @@ -0,0 +1,229 @@ +#include <linux/errno.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/types.h> +#include <net/checksum.h> +#include <net/ip.h> +#include <net/ip6_fib.h> +#include <net/lwtunnel.h> +#include <net/protocol.h> +#include <uapi/linux/ila.h> + +struct ila_params { + __be64 locator; + __be64 locator_match; + __wsum csum_diff; +}; + +static inline struct ila_params *ila_params_lwtunnel( + struct lwtunnel_state *lwstate) +{ + return (struct ila_params *)lwstate->data; +} + +static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], to[0], to[1], + }; + + return csum_partial(diff, sizeof(diff), 0); +} + +static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + if (*(__be64 *)&ip6h->daddr == p->locator_match) + return p->csum_diff; + else + return compute_csum_diff8((__be32 *)&ip6h->daddr, + (__be32 *)&p->locator); +} + +static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + __wsum diff; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + size_t nhoff = sizeof(struct ipv6hdr); + + /* First update checksum */ + switch (ip6h->nexthdr) { + case NEXTHDR_TCP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { + struct tcphdr *th = (struct tcphdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, + diff, true); + } + break; + case NEXTHDR_UDP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { + struct udphdr *uh = (struct udphdr *) + (skb_network_header(skb) + nhoff); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, + diff, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + break; + case NEXTHDR_ICMP: + if (likely(pskb_may_pull(skb, + nhoff + sizeof(struct icmp6hdr)))) { + struct icmp6hdr *ih = (struct icmp6hdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, + diff, true); + } + break; + } + + /* Now change destination address */ + *(__be64 *)&ip6h->daddr = p->locator; +} + +static int ila_output(struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_output(sk, skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int ila_input(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, +}; + +static int ila_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct ila_params *p; + struct nlattr *tb[ILA_ATTR_MAX + 1]; + size_t encap_len = sizeof(*p); + struct lwtunnel_state *newts; + const struct fib6_config *cfg6 = cfg; + int ret; + + if (family != AF_INET6) + return -EINVAL; + + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, + ila_nl_policy); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) + return -EINVAL; + + newts = lwtunnel_state_alloc(encap_len); + if (!newts) + return -ENOMEM; + + newts->len = encap_len; + p = ila_params_lwtunnel(newts); + + p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + + if (cfg6->fc_dst_len > sizeof(__be64)) { + /* Precompute checksum difference for translation since we + * know both the old locator and the new one. + */ + p->locator_match = *(__be64 *)&cfg6->fc_dst; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, (__be32 *)&p->locator); + } + + newts->type = LWTUNNEL_ENCAP_ILA; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + + *ts = newts; + + return 0; +} + +static int ila_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ila_params *p = ila_params_lwtunnel(lwtstate); + + if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + /* No encapsulation overhead */ + return 0; +} + +static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ila_params *a_p = ila_params_lwtunnel(a); + struct ila_params *b_p = ila_params_lwtunnel(b); + + return (a_p->locator != b_p->locator); +} + +static const struct lwtunnel_encap_ops ila_encap_ops = { + .build_state = ila_build_state, + .output = ila_output, + .input = ila_input, + .fill_encap = ila_fill_encap_info, + .get_encap_size = ila_encap_nlsize, + .cmp_encap = ila_encap_cmp, +}; + +static int __init ila_init(void) +{ + return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +static void __exit ila_fini(void) +{ + lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5693b5eb8482..418d9823692b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -173,12 +173,13 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) *ppcpu_rt = NULL; } } + + non_pcpu_rt->rt6i_pcpu = NULL; } static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) { - lwtstate_put(rt->rt6i_lwtstate); rt6_free_pcpu(rt); dst_free(&rt->dst); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 34f121812a14..4038c694ec03 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); ip6gre_tunnel_unlink(ign, t); + ip6_tnl_dst_reset(t); dev_put(dev); } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index e1a1136bda7c..14dacf1df529 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -23,6 +23,15 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, if (err < 0) goto error; + if (cfg->ipv6_v6only) { + int val = 1; + + err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, + (char *) &val, sizeof(val)); + if (err < 0) + goto error; + } + udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index df8afe5ab31e..9405b04eecc6 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -143,34 +143,36 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff *skb_chk = NULL; unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret; + int ret = -EINVAL; transport_len = ntohs(ipv6_hdr(skb)->payload_len); transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); - skb_get(skb); skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - return -EINVAL; + goto err; - if (!pskb_may_pull(skb_chk, len)) { - kfree_skb(skb_chk); - return -EINVAL; - } + if (!pskb_may_pull(skb_chk, len)) + goto err; ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) { - kfree_skb(skb_chk); - return ret; - } + if (ret) + goto err; if (skb_trimmed) *skb_trimmed = skb_chk; - else + /* free now unneeded clone */ + else if (skb_chk != skb) kfree_skb(skb_chk); - return 0; + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; } /** @@ -179,7 +181,7 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets - * skb network and transport headers accordingly and returns zero. + * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard @@ -194,7 +196,8 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * to leave the original skb and its full frame unchanged (which might be * desirable for layer 2 frame jugglers). * - * The caller needs to release a reference count from any returned skb_trimmed. + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. */ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b3054611f88a..13d3c2beb93e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -553,7 +553,8 @@ static void ndisc_send_unsol_na(struct net_device *dev) void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr) + const struct in6_addr *daddr, const struct in6_addr *saddr, + struct sk_buff *oskb) { struct sk_buff *skb; struct in6_addr addr_buf; @@ -589,6 +590,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr); + if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb) + skb_dst_copy(skb, oskb); + ndisc_send_skb(skb, daddr, saddr); } @@ -675,12 +679,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, neigh, target, target, saddr); + ndisc_send_ns(dev, neigh, target, target, saddr, skb); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); + ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb); } } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index b552cf0d6198..96833e4b3193 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,9 +47,21 @@ config NFT_REJECT_IPV6 default NFT_REJECT tristate +config NFT_DUP_IPV6 + tristate "IPv6 nf_tables packet duplication support" + select NF_DUP_IPV6 + help + This module enables IPv6 packet duplication support for nf_tables. + endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_DUP_IPV6 + tristate "Netfilter IPv6 packet duplication to alternate destination" + help + This option enables the nf_dup_ipv6 core, which duplicates an IPv6 + packet to be rerouted to another destination. + config NF_REJECT_IPV6 tristate "IPv6 packet rejection" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c36e0a5490de..b4f7d0b4e2af 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o # reject obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o +obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o @@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o +obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4e21f80228be..0771991ed812 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -455,15 +455,11 @@ ip6t_do_table(struct sk_buff *skb, } /* Figures out from what hook each rule can be called: returns 0 if - * there are loops. Puts hook bitmask in comefrom. - * - * Keeps track of largest call depth seen and stores it in newinfo->stacksize. - */ + there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { - unsigned int calldepth, max_calldepth = 0; unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset @@ -477,7 +473,6 @@ mark_source_chains(struct xt_table_info *newinfo, /* Set initial back pointer. */ e->counters.pcnt = pos; - calldepth = 0; for (;;) { const struct xt_standard_target *t @@ -539,8 +534,6 @@ mark_source_chains(struct xt_table_info *newinfo, (entry0 + pos + size); e->counters.pcnt = pos; pos += size; - if (calldepth > 0) - --calldepth; } else { int newpos = t->verdict; @@ -554,11 +547,6 @@ mark_source_chains(struct xt_table_info *newinfo, newpos); return 0; } - if (entry0 + newpos != ip6t_next_entry(e) && - !(e->ipv6.flags & IP6T_F_GOTO) && - ++calldepth > max_calldepth) - max_calldepth = calldepth; - /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -575,7 +563,6 @@ mark_source_chains(struct xt_table_info *newinfo, next: duprintf("Finished chain %u\n", hook); } - newinfo->stacksize = max_calldepth; return 1; } @@ -855,6 +842,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (ret != 0) return ret; ++i; + if (strcmp(ip6t_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (i != repl->num_entries) { @@ -1767,6 +1757,9 @@ translate_compat_table(struct net *net, if (ret != 0) break; ++i; + if (strcmp(ip6t_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; } if (ret) { /* diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 567367a75172..0ed841a3fa33 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -63,6 +63,12 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) case IP6T_TCP_RESET: nf_send_reset6(net, skb, par->hooknum); break; + case IP6T_ICMP6_POLICY_FAIL: + nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); + break; + case IP6T_ICMP6_REJECT_ROUTE: + nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); + break; } return NF_DROP; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index ebbb754c2111..1e4bf99ed16e 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -237,7 +237,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, nth->ack_seq = th->ack_seq; tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; - nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->window = htons(ntohs(th->window) >> opts->wscale); nth->check = 0; nth->urg_ptr = 0; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4ba0c34c627b..7302900c321a 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) if (*len < 0 || (unsigned int) *len < sizeof(sin6)) return -EINVAL; - h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); if (!h) { pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 90388d606483..0e6fae103d33 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + struct nf_conntrack_zone tmp; NF_CT_ASSERT(skb->nfct == NULL); @@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, zone, &intuple); + h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), + &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6d02498172c1..701cd2bae0a9 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -633,6 +633,7 @@ ret_orig: kfree_skb(clone); return skb; } +EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); void nf_ct_frag6_consume_orig(struct sk_buff *skb) { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 267fb8d5876e..6d9c0b3d5b8c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -33,20 +33,22 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { - u16 zone = NF_CT_DEFAULT_ZONE; - + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) - if (skb->nfct) - zone = nf_ct_zone((struct nf_conn *)skb->nfct); + if (skb->nfct) { + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + } #endif if (nf_bridge_in_prerouting(skb)) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN + zone; + return IP6_DEFRAG_CONNTRACK_IN + zone_id; else - return IP6_DEFRAG_CONNTRACK_OUT + zone; - + return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c new file mode 100644 index 000000000000..c5c87e921ccd --- /dev/null +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -0,0 +1,96 @@ +/* + * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag> + * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de> + * + * Extracted from xt_TEE.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 or later, as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + +static struct net *pick_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS + const struct dst_entry *dst; + + if (skb->dev != NULL) + return dev_net(skb->dev); + dst = skb_dst(skb); + if (dst != NULL && dst->dev != NULL) + return dev_net(dst->dev); +#endif + return &init_net; +} + +static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, + int oif) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = pick_net(skb); + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + if (oif != -1) + fl6.flowi6_oif = oif; + + fl6.daddr = *gw; + fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + dst_release(dst); + return false; + } + skb_dst_drop(skb); + skb_dst_set(skb, dst); + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + return true; +} + +void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, + const struct in6_addr *gw, int oif) +{ + if (this_cpu_read(nf_skb_duplicated)) + return; + skb = pskb_copy(skb, GFP_ATOMIC); + if (skb == NULL) + return; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + nf_conntrack_put(skb->nfct); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); +#endif + if (hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_IN) { + struct ipv6hdr *iph = ipv6_hdr(skb); + --iph->hop_limit; + } + if (nf_dup_ipv6_route(skb, gw, oif)) { + __this_cpu_write(nf_skb_duplicated, true); + ip6_local_out(skb); + __this_cpu_write(nf_skb_duplicated, false); + } else { + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(nf_dup_ipv6); + +MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); +MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index e76900e0aa92..70fbaed49edb 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb, newip = &t->dst.u3.in6; } inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, 1); + newip->s6_addr32, true); } static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, @@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 2205e8eeeacf..57593b00c5b4 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb, hdr->icmp6_type == ICMPV6_ECHO_REPLY) { inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, hdr->icmp6_identifier, - tuple->src.u.icmp.id, 0); + tuple->src.u.icmp.id, false); hdr->icmp6_identifier = tuple->src.u.icmp.id; } return true; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c new file mode 100644 index 000000000000..0eaa4f65fdea --- /dev/null +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> + +struct nft_dup_ipv6 { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; + int oif = regs->data[priv->sreg_dev]; + + nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif); +} + +static int nft_dup_ipv6_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_DUP_SREG_ADDR] == NULL) + return -EINVAL; + + priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); + err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr)); + if (err < 0) + return err; + + if (tb[NFTA_DUP_SREG_DEV] != NULL) { + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + } + return 0; +} + +static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_ipv6 *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_ipv6_type; +static const struct nft_expr_ops nft_dup_ipv6_ops = { + .type = &nft_dup_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)), + .eval = nft_dup_ipv6_eval, + .init = nft_dup_ipv6_init, + .dump = nft_dup_ipv6_dump, +}; + +static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 }, + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_dup_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "dup", + .ops = &nft_dup_ipv6_ops, + .policy = nft_dup_ipv6_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_ipv6_module_init(void) +{ + return nft_register_expr(&nft_dup_ipv6_type); +} + +static void __exit nft_dup_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_dup_ipv6_type); +} + +module_init(nft_dup_ipv6_module_init); +module_exit(nft_dup_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup"); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1c0217e61357..308dd5f9158f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -54,11 +54,13 @@ #include <net/tcp.h> #include <linux/rtnetlink.h> #include <net/dst.h> +#include <net/dst_metadata.h> #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> #include <net/nexthop.h> #include <net/lwtunnel.h> +#include <net/ip_tunnels.h> #include <asm/uaccess.h> @@ -319,8 +321,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -337,10 +338,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, static struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); if (rt) { rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); @@ -538,7 +538,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); dev_put(work->dev); kfree(work); } @@ -957,8 +957,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, - 0, ort->rt6i_table); + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); if (!rt) return NULL; @@ -990,8 +989,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) struct rt6_info *pcpu_rt; pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), - rt->dst.dev, rt->dst.flags, - rt->rt6i_table); + rt->dst.dev, rt->dst.flags); if (!pcpu_rt) return NULL; @@ -1004,32 +1002,53 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) /* It should be called with read_lock_bh(&tb6_lock) acquired */ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) { - struct rt6_info *pcpu_rt, *prev, **p; + struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt) - goto done; + if (pcpu_rt) { + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + } + return pcpu_rt; +} + +static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +{ + struct fib6_table *table = rt->rt6i_table; + struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { struct net *net = dev_net(rt->dst.dev); - pcpu_rt = net->ipv6.ip6_null_entry; - goto done; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return net->ipv6.ip6_null_entry; } - prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ + read_lock_bh(&table->tb6_lock); + if (rt->rt6i_pcpu) { + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + } else { + /* rt has been removed from the fib6 tree + * before we have a chance to acquire the read_lock. + * In this case, don't brother to create a pcpu rt + * since rt is going away anyway. The next + * dst_check() will trigger a re-lookup. + */ dst_destroy(&pcpu_rt->dst); - pcpu_rt = prev; + pcpu_rt = rt; } - -done: dst_hold(&pcpu_rt->dst); rt6_dst_from_metrics_check(pcpu_rt); + read_unlock_bh(&table->tb6_lock); return pcpu_rt; } @@ -1104,9 +1123,22 @@ redo_rt6_select: rt->dst.lastuse = jiffies; rt->dst.__use++; pcpu_rt = rt6_get_pcpu_route(rt); - read_unlock_bh(&table->tb6_lock); + + if (pcpu_rt) { + read_unlock_bh(&table->tb6_lock); + } else { + /* We have to do the read_unlock first + * because rt6_make_pcpu_route() may trigger + * ip6_dst_gc() which will take the write_lock. + */ + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); + pcpu_rt = rt6_make_pcpu_route(rt); + dst_release(&rt->dst); + } return pcpu_rt; + } } @@ -1131,6 +1163,7 @@ void ip6_route_input(struct sk_buff *skb) const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; + struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, @@ -1140,6 +1173,10 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_proto = iph->nexthdr, }; + tun_info = skb_tunnel_info(skb); + if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) + fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; + skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); } @@ -1562,7 +1599,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(net, dev, 0, NULL); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1749,7 +1786,8 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, + (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -1781,12 +1819,19 @@ int ip6_route_add(struct fib6_config *cfg) struct lwtunnel_state *lwtstate; err = lwtunnel_build_state(dev, cfg->fc_encap_type, - cfg->fc_encap, &lwtstate); + cfg->fc_encap, AF_INET6, cfg, + &lwtstate); if (err) goto out; - rt->rt6i_lwtstate = lwtstate_get(lwtstate); - if (lwtunnel_output_redirect(rt->rt6i_lwtstate)) - rt->dst.output = lwtunnel_output6; + rt->dst.lwtstate = lwtstate_get(lwtstate); + if (lwtunnel_output_redirect(rt->dst.lwtstate)) { + rt->dst.lwtstate->orig_output = rt->dst.output; + rt->dst.output = lwtunnel_output; + } + if (lwtunnel_input_redirect(rt->dst.lwtstate)) { + rt->dst.lwtstate->orig_input = rt->dst.input; + rt->dst.input = lwtunnel_input; + } } ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); @@ -2168,7 +2213,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) #endif rt->rt6i_prefsrc = ort->rt6i_prefsrc; rt->rt6i_table = ort->rt6i_table; - rt->rt6i_lwtstate = lwtstate_get(ort->rt6i_lwtstate); + rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -2419,7 +2464,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT, NULL); + DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); @@ -2832,7 +2877,7 @@ static inline size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->rt6i_lwtstate); + + lwtunnel_get_encap_size(rt->dst.lwtstate); } static int rt6_fill_node(struct net *net, @@ -2985,7 +3030,7 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->rt6i_lwtstate); + lwtunnel_fill_encap(skb, rt->dst.lwtstate); nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index a74013d3eceb..30caa289c5db 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,6 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/vrf.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -131,8 +132,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; + if (skb_dst(skb)) { + oif = vrf_master_ifindex(skb_dst(skb)->dev) ? + : skb_dst(skb)->dev->ifindex; + } memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; diff --git a/net/key/af_key.c b/net/key/af_key.c index b397f0aa9005..83a70688784b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -219,7 +219,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, +static int pfkey_broadcast(struct sk_buff *skb, int broadcast_flags, struct sock *one_sk, struct net *net) { @@ -244,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, * socket. */ if (pfk->promisc) - pfkey_broadcast_one(skb, &skb2, allocation, sk); + pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) @@ -259,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, continue; } - err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk); + err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); /* Error is cleare after succecful sending to at least one * registered KM */ @@ -269,7 +269,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); + err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); kfree_skb(skb2); kfree_skb(skb); @@ -292,7 +292,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; - pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -333,7 +333,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1365,7 +1365,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_state_put(x); - pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); + pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); return 0; } @@ -1452,7 +1452,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); + pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); return 0; } @@ -1565,7 +1565,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1670,7 +1670,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad return -ENOBUFS; } - pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); + pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } @@ -1689,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); + return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) @@ -1710,7 +1710,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); return 0; } @@ -1767,7 +1767,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -1847,7 +1847,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb new_hdr->sadb_msg_errno = 0; } - pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); + pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -2181,7 +2181,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); + pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } @@ -2401,7 +2401,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); + pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2655,7 +2655,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -2708,7 +2708,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); return 0; } @@ -2770,7 +2770,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); @@ -2992,7 +2992,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); + pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } @@ -3182,7 +3182,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, @@ -3380,7 +3380,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3572,7 +3572,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); return 0; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 247552a7f6c2..3ece7d1034c8 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -92,14 +92,15 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { - int j = MAX_THR_RATES; - struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + int j; + struct minstrel_rate_stats *tmp_mrs; struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { - j--; + for (j = MAX_THR_RATES; j > 0; --j) { tmp_mrs = &mi->r[tp_list[j - 1]].stats; + if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <= + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma)) + break; } if (j < MAX_THR_RATES - 1) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 276f8c992218..21e70bc9af98 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -48,7 +48,6 @@ int mpls_output(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; - struct lwtunnel_state *lwtstate = NULL; int err = 0; bool bos; int i; @@ -58,11 +57,9 @@ int mpls_output(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) { ttl = ip_hdr(skb)->ttl; rt = (struct rtable *)dst; - lwtstate = rt->rt_lwtstate; } else if (skb->protocol == htons(ETH_P_IPV6)) { ttl = ipv6_hdr(skb)->hop_limit; rt6 = (struct rt6_info *)dst; - lwtstate = rt6->rt6i_lwtstate; } else { goto drop; } @@ -72,12 +69,12 @@ int mpls_output(struct sock *sk, struct sk_buff *skb) /* Find the output device */ out_dev = dst->dev; if (!mpls_output_possible(out_dev) || - !lwtstate || skb_warn_if_lro(skb)) + !dst->lwtstate || skb_warn_if_lro(skb)) goto drop; skb_forward_csum(skb); - tun_encap_info = mpls_lwtunnel_encap(lwtstate); + tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); /* Verify the destination can hold the packet */ new_header_size = mpls_encap_size(tun_encap_info); @@ -126,6 +123,7 @@ drop: } static int mpls_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, struct lwtunnel_state **ts) { struct mpls_iptunnel_encap *tun_encap_info; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6eae69a698ed..3e1b4abf1897 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + select NF_DUP_IPV4 + select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- This option adds a "TEE" target with which a packet can be cloned and this clone be rerouted to another nexthop. diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 2a5a0704245c..0b939b7ad724 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -388,9 +388,6 @@ EXPORT_SYMBOL(nf_conntrack_destroy); struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nfq_ct_hook); -struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly; -EXPORT_SYMBOL_GPL(nfq_ct_nat_hook); - #endif /* CONFIG_NF_CONNTRACK */ #ifdef CONFIG_NF_NAT_NEEDED diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 3b6929dec748..b32fb0dbe237 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -162,6 +162,17 @@ config IP_VS_FO If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +config IP_VS_OVF + tristate "weighted overflow scheduling" + ---help--- + The weighted overflow scheduling algorithm directs network + connections to the server with the highest weight that is + currently available and overflows to the next when active + connections exceed the node's weight. + + If you want to compile it in kernel, say Y. To compile it as a + module, choose M here. If unsure, say N. + config IP_VS_LBLC tristate "locality-based least-connection scheduling" ---help--- diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 38b2723b2e3d..67f3f4389602 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o +obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 24c554201a76..1a23e91d50d8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - mutex_lock(&ipvs->sync_mutex); - if (cmd == IP_VS_SO_SET_STARTDAEMON) - ret = start_sync_thread(net, dm->state, dm->mcast_ifn, - dm->syncid); - else + if (cmd == IP_VS_SO_SET_STARTDAEMON) { + struct ipvs_sync_daemon_cfg cfg; + + memset(&cfg, 0, sizeof(cfg)); + strlcpy(cfg.mcast_ifn, dm->mcast_ifn, + sizeof(cfg.mcast_ifn)); + cfg.syncid = dm->syncid; + rtnl_lock(); + mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(net, &cfg, dm->state); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + } else { + mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(net, dm->state); - mutex_unlock(&ipvs->sync_mutex); + mutex_unlock(&ipvs->sync_mutex); + } goto out_dec; } @@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) mutex_lock(&ipvs->sync_mutex); if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, sizeof(d[0].mcast_ifn)); - d[0].syncid = ipvs->master_syncid; + d[0].syncid = ipvs->mcfg.syncid; } if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, sizeof(d[1].mcast_ifn)); - d[1].syncid = ipvs->backup_syncid; + d[1].syncid = ipvs->bcfg.syncid; } if (copy_to_user(user, &d, sizeof(d)) != 0) ret = -EFAULT; @@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, .len = IP_VS_IFNAME_MAXLEN }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, + [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, + [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, + [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, }; /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ @@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, } static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, - const char *mcast_ifn, __u32 syncid) + struct ipvs_sync_daemon_cfg *c) { struct nlattr *nl_daemon; @@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, return -EMSGSIZE; if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || - nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || - nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) + nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || + nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || + nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || + nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || + nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) goto nla_put_failure; +#ifdef CONFIG_IP_VS_IPV6 + if (c->mcast_af == AF_INET6) { + if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, + &c->mcast_group.in6)) + goto nla_put_failure; + } else +#endif + if (c->mcast_af == AF_INET && + nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, + c->mcast_group.ip)) + goto nla_put_failure; nla_nest_end(skb, nl_daemon); return 0; @@ -3288,7 +3317,7 @@ nla_put_failure: } static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, - const char *mcast_ifn, __u32 syncid, + struct ipvs_sync_daemon_cfg *c, struct netlink_callback *cb) { void *hdr; @@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, if (!hdr) return -EMSGSIZE; - if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + if (ip_vs_genl_fill_daemon(skb, state, c)) goto nla_put_failure; genlmsg_end(skb, hdr); @@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, mutex_lock(&ipvs->sync_mutex); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, - ipvs->master_mcast_ifn, - ipvs->master_syncid, cb) < 0) + &ipvs->mcfg, cb) < 0) goto nla_put_failure; cb->args[0] = 1; @@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, - ipvs->backup_mcast_ifn, - ipvs->backup_syncid, cb) < 0) + &ipvs->bcfg, cb) < 0) goto nla_put_failure; cb->args[1] = 1; @@ -3342,30 +3369,83 @@ nla_put_failure: static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) { + struct netns_ipvs *ipvs = net_ipvs(net); + struct ipvs_sync_daemon_cfg c; + struct nlattr *a; + int ret; + + memset(&c, 0, sizeof(c)); if (!(attrs[IPVS_DAEMON_ATTR_STATE] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; + strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + sizeof(c.mcast_ifn)); + c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); + + a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; + if (a) + c.sync_maxlen = nla_get_u16(a); + + a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; + if (a) { + c.mcast_af = AF_INET; + c.mcast_group.ip = nla_get_in_addr(a); + if (!ipv4_is_multicast(c.mcast_group.ip)) + return -EINVAL; + } else { + a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; + if (a) { +#ifdef CONFIG_IP_VS_IPV6 + int addr_type; + + c.mcast_af = AF_INET6; + c.mcast_group.in6 = nla_get_in6_addr(a); + addr_type = ipv6_addr_type(&c.mcast_group.in6); + if (!(addr_type & IPV6_ADDR_MULTICAST)) + return -EINVAL; +#else + return -EAFNOSUPPORT; +#endif + } + } + + a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; + if (a) + c.mcast_port = nla_get_u16(a); + + a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; + if (a) + c.mcast_ttl = nla_get_u8(a); /* The synchronization protocol is incompatible with mixed family * services */ - if (net_ipvs(net)->mixed_address_family_dests > 0) + if (ipvs->mixed_address_family_dests > 0) return -EINVAL; - return start_sync_thread(net, - nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), - nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), - nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); + rtnl_lock(); + mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(net, &c, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + return ret; } static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) { + struct netns_ipvs *ipvs = net_ipvs(net); + int ret; + if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - return stop_sync_thread(net, - nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_lock(&ipvs->sync_mutex); + ret = stop_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_unlock(&ipvs->sync_mutex); + return ret; } static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) @@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { - int ret = 0, cmd; + int ret = -EINVAL, cmd; struct net *net; struct netns_ipvs *ipvs; @@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; - mutex_lock(&ipvs->sync_mutex); if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy)) { - ret = -EINVAL; + ip_vs_daemon_policy)) goto out; - } if (cmd == IPVS_CMD_NEW_DAEMON) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); -out: - mutex_unlock(&ipvs->sync_mutex); } + +out: return ret; } diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 5882bbfd198c..136184572fc9 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c new file mode 100644 index 000000000000..f7d62c3b7329 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_ovf.c @@ -0,0 +1,86 @@ +/* + * IPVS: Overflow-Connection Scheduling module + * + * Authors: Raducu Deaconu <rhadoo_io@yahoo.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Scheduler implements "overflow" loadbalancing according to number of active + * connections , will keep all conections to the node with the highest weight + * and overflow to the next node if the number of connections exceeds the node's + * weight. + * Note that this scheduler might not be suitable for UDP because it only uses + * active connections + * + */ + +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> + +#include <net/ip_vs.h> + +/* OVF Connection scheduling */ +static struct ip_vs_dest * +ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest, *h = NULL; + int hw = 0, w; + + IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n"); + /* select the node with highest weight, go to next in line if active + * connections exceed weight + */ + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { + w = atomic_read(&dest->weight); + if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || + atomic_read(&dest->activeconns) > w || + w == 0) + continue; + if (!h || w > hw) { + h = dest; + hw = w; + } + } + + if (h) { + IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n", + IP_VS_DBG_ADDR(h->af, &h->addr), + ntohs(h->port), + atomic_read(&h->activeconns), + atomic_read(&h->weight)); + return h; + } + + ip_vs_scheduler_err(svc, "no destination available"); + return NULL; +} + +static struct ip_vs_scheduler ip_vs_ovf_scheduler = { + .name = "ovf", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list), + .schedule = ip_vs_ovf_schedule, +}; + +static int __init ip_vs_ovf_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_ovf_scheduler); +} + +static void __exit ip_vs_ovf_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler); + synchronize_rcu(); +} + +module_init(ip_vs_ovf_init); +module_exit(ip_vs_ovf_cleanup); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d99ad93eb855..43f140950075 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -262,6 +262,11 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; +union ipvs_sockaddr { + struct sockaddr_in in; + struct sockaddr_in6 in6; +}; + struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) * Create a new sync buffer for Version 1 proto. */ static inline struct ip_vs_sync_buff * -ip_vs_sync_buff_create(struct netns_ipvs *ipvs) +ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg), + ipvs->mcfg.sync_maxlen); + sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->version = SYNC_PROTO_VER; - sb->mesg->syncid = ipvs->master_syncid; + sb->mesg->syncid = ipvs->mcfg.syncid; sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); - sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; + sb->end = (unsigned char *)sb->mesg + len; sb->firstuse = jiffies; return sb; @@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) * Create a new sync buffer for Version 0 proto. */ static inline struct ip_vs_sync_buff * -ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) +ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; @@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0), + ipvs->mcfg.sync_maxlen); + sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; - mesg->syncid = ipvs->master_syncid; + mesg->syncid = ipvs->mcfg.syncid; mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); - sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; + sb->end = (unsigned char *)mesg + len; sb->firstuse = jiffies; return sb; } @@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; - int len; + unsigned int len; if (unlikely(cp->af != AF_INET)) return; @@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; buff = ms->sync_buff; + len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : + SIMPLE_CONN_SIZE; if (buff) { m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; /* Send buffer if it is for v1 */ - if (!m->nr_conns) { + if (buff->head + len > buff->end || !m->nr_conns) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; } } if (!buff) { - buff = ip_vs_sync_buff_create_v0(ipvs); + buff = ip_vs_sync_buff_create_v0(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); @@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ms->sync_buff = buff; } - len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : - SIMPLE_CONN_SIZE; m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; s = (struct ip_vs_sync_conn_v0 *) buff->head; @@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, m->nr_conns++; m->size = htons(ntohs(m->size) + len); buff->head += len; - - /* check if there is a space for next one */ - if (buff->head + FULL_CONN_SIZE > buff->end) { - sb_queue_tail(ipvs, ms); - ms->sync_buff = NULL; - } spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ @@ -694,7 +697,7 @@ sloop: } if (!buff) { - buff = ip_vs_sync_buff_create(ipvs); + buff = ip_vs_sync_buff_create(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); @@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* SyncID sanity check */ - if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { + if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } @@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop) /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ lock_sock(sk); inet->mc_loop = loop ? 1 : 0; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MULTICAST_LOOP */ + np->mc_loop = loop ? 1 : 0; + } +#endif release_sock(sk); } @@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); inet->mc_ttl = ttl; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MULTICAST_HOPS */ + np->mcast_hops = ttl; + } +#endif + release_sock(sk); +} + +/* Control fragmentation of messages */ +static void set_mcast_pmtudisc(struct sock *sk, int val) +{ + struct inet_sock *inet = inet_sk(sk); + + /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ + lock_sock(sk); + inet->pmtudisc = val; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MTU_DISCOVER */ + np->pmtudisc = val; + } +#endif release_sock(sk); } @@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname) lock_sock(sk); inet->mc_index = dev->ifindex; /* inet->mc_addr = 0; */ - release_sock(sk); - - return 0; -} - +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); -/* - * Set the maximum length of sync message according to the - * specified interface's MTU. - */ -static int set_sync_mesg_maxlen(struct net *net, int sync_state) -{ - struct netns_ipvs *ipvs = net_ipvs(net); - struct net_device *dev; - int num; - - if (sync_state == IP_VS_STATE_MASTER) { - dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); - if (!dev) - return -ENODEV; - - num = (dev->mtu - sizeof(struct iphdr) - - sizeof(struct udphdr) - - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + - SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); - IP_VS_DBG(7, "setting the maximum length of sync sending " - "message %d.\n", ipvs->send_mesg_maxlen); - } else if (sync_state == IP_VS_STATE_BACKUP) { - dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); - if (!dev) - return -ENODEV; - - ipvs->recv_mesg_maxlen = dev->mtu - - sizeof(struct iphdr) - sizeof(struct udphdr); - IP_VS_DBG(7, "setting the maximum length of sync receiving " - "message %d.\n", ipvs->recv_mesg_maxlen); + /* IPV6_MULTICAST_IF */ + np->mcast_oif = dev->ifindex; } +#endif + release_sock(sk); return 0; } @@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; - rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); - rtnl_unlock(); return ret; } +#ifdef CONFIG_IP_VS_IPV6 +static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, + char *ifname) +{ + struct net *net = sock_net(sk); + struct net_device *dev; + int ret; + + dev = __dev_get_by_name(net, ifname); + if (!dev) + return -ENODEV; + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; + + lock_sock(sk); + ret = ipv6_sock_mc_join(sk, dev->ifindex, addr); + release_sock(sk); + + return ret; +} +#endif static int bind_mcastif_addr(struct socket *sock, char *ifname) { @@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); } +static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, + struct ipvs_sync_daemon_cfg *c, int id) +{ + if (AF_INET6 == c->mcast_af) { + sa->in6 = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_port = htons(c->mcast_port + id), + }; + sa->in6.sin6_addr = c->mcast_group.in6; + *salen = sizeof(sa->in6); + } else { + sa->in = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_port = htons(c->mcast_port + id), + }; + sa->in.sin_addr = c->mcast_group.in; + *salen = sizeof(sa->in); + } +} + /* * Set up sending multicast socket over UDP */ @@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ - struct sockaddr_in mcast_addr = { - .sin_family = AF_INET, - .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), - .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), - }; + union ipvs_sockaddr mcast_addr; struct socket *sock; - int result; + int result, salen; /* First create a socket */ - result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM, + IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); + result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; } set_mcast_loop(sock->sk, 0); - set_mcast_ttl(sock->sk, 1); + set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl); + /* Allow fragmentation if MTU changes */ + set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT); result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 1, result); - result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); + if (AF_INET == ipvs->mcfg.mcast_af) + result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); + else + result = 0; if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; } + get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - sizeof(struct sockaddr), 0); + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; @@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ - struct sockaddr_in mcast_addr = { - .sin_family = AF_INET, - .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), - .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), - }; + union ipvs_sockaddr mcast_addr; struct socket *sock; - int result; + int result, salen; /* First create a socket */ - result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM, + IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); @@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id) if (result > 0) set_sock_size(sock->sk, 0, result); - result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, - sizeof(struct sockaddr)); + get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); + result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ - result = join_mcast_group(sock->sk, - (struct in_addr *) &mcast_addr.sin_addr, - ipvs->backup_mcast_ifn); +#ifdef CONFIG_IP_VS_IPV6 + if (ipvs->bcfg.mcast_af == AF_INET6) + result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, + ipvs->bcfg.mcast_ifn); + else +#endif + result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, + ipvs->bcfg.mcast_ifn); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; @@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data) pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d, id = %d\n", - ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); + ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id); for (;;) { sb = next_sync_buff(ipvs, ms); @@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data) pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d, id = %d\n", - ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); + ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data) /* do we have data now? */ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { len = ip_vs_receive(tinfo->sock, tinfo->buf, - ipvs->recv_mesg_maxlen); + ipvs->bcfg.sync_maxlen); if (len <= 0) { if (len != -EAGAIN) pr_err("receiving message error\n"); @@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) +int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, + int state) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **array = NULL, *task; struct socket *sock; struct netns_ipvs *ipvs = net_ipvs(net); + struct net_device *dev; char *name; int (*threadfn)(void *data); - int id, count; + int id, count, hlen; int result = -ENOMEM; + u16 mtu, min_mtu; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", @@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) } else count = ipvs->threads_mask + 1; + if (c->mcast_af == AF_UNSPEC) { + c->mcast_af = AF_INET; + c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP); + } + if (!c->mcast_port) + c->mcast_port = IP_VS_SYNC_PORT; + if (!c->mcast_ttl) + c->mcast_ttl = 1; + + dev = __dev_get_by_name(net, c->mcast_ifn); + if (!dev) { + pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); + return -ENODEV; + } + hlen = (AF_INET6 == c->mcast_af) ? + sizeof(struct ipv6hdr) + sizeof(struct udphdr) : + sizeof(struct iphdr) + sizeof(struct udphdr); + mtu = (state == IP_VS_STATE_BACKUP) ? + clamp(dev->mtu, 1500U, 65535U) : 1500U; + min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1; + + if (c->sync_maxlen) + c->sync_maxlen = clamp_t(unsigned int, + c->sync_maxlen, min_mtu, + 65535 - hlen); + else + c->sync_maxlen = mtu - hlen; + if (state == IP_VS_STATE_MASTER) { if (ipvs->ms) return -EEXIST; - strlcpy(ipvs->master_mcast_ifn, mcast_ifn, - sizeof(ipvs->master_mcast_ifn)); - ipvs->master_syncid = syncid; + ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { if (ipvs->backup_threads) return -EEXIST; - strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, - sizeof(ipvs->backup_mcast_ifn)); - ipvs->backup_syncid = syncid; + ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { @@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) if (!array) goto out; } - set_sync_mesg_maxlen(net, state); tinfo = NULL; for (id = 0; id < count; id++) { @@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) tinfo->net = net; tinfo->sock = sock; if (state == IP_VS_STATE_BACKUP) { - tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, + tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) goto outtinfo; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3c20d02aee73..ac3be9b0629b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); -static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) +static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) { unsigned int n; @@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) * three bytes manually. */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^ + return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ (((__force __u16)tuple->dst.u.all << 16) | tuple->dst.protonum)); } @@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net) } static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - u16 zone, unsigned int size) + unsigned int size) { - return __hash_bucket(hash_conntrack_raw(tuple, zone), size); + return __hash_bucket(hash_conntrack_raw(tuple), size); } -static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, +static inline u_int32_t hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, zone, net->ct.htable_size); + return __hash_conntrack(tuple, net->ct.htable_size); } bool @@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) } /* Released via destroy_conntrack() */ -struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags) { struct nf_conn *tmpl; @@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; + if (nf_ct_zone_add(tmpl, flags, zone) < 0) + goto out_free; - nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif atomic_set(&tmpl->ct_general.use, 0); return tmpl; -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: kfree(tmpl); return NULL; -#endif } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -373,7 +366,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; - u16 zone = nf_ct_zone(ct); unsigned int sequence; nf_ct_helper_destroy(ct); @@ -381,9 +373,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -431,8 +423,8 @@ static void death_by_timeout(unsigned long ul_conntrack) static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, - const struct nf_conntrack_tuple *tuple, - u16 zone) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -440,8 +432,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * so we need to check that the conntrack is confirmed */ return nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone && - nf_ct_is_confirmed(ct); + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && + nf_ct_is_confirmed(ct); } /* @@ -450,7 +442,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * and recheck nf_ct_tuple_equal(tuple, &h->tuple) */ static struct nf_conntrack_tuple_hash * -____nf_conntrack_find(struct net *net, u16 zone, +____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -486,7 +478,7 @@ begin: /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * -__nf_conntrack_find_get(struct net *net, u16 zone, +__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -513,11 +505,11 @@ begin: } struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); + hash_conntrack_raw(tuple)); } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -536,11 +528,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, int nf_conntrack_hash_check_insert(struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - u16 zone; unsigned int sequence; zone = nf_ct_zone(ct); @@ -548,9 +540,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -558,12 +550,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; add_timer(&ct->timeout); @@ -588,6 +582,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); int __nf_conntrack_confirm(struct sk_buff *skb) { + const struct nf_conntrack_zone *zone; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -596,7 +591,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; - u16 zone; unsigned int sequence; ct = nf_ct_get(skb, &ctinfo); @@ -617,7 +611,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = hash_bucket(hash, net); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -649,12 +643,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; /* Timer relative to confirmation time, not original @@ -707,11 +703,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct net *net = nf_ct_net(ignored_conntrack); + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *ct; - u16 zone = nf_ct_zone(ignored_conntrack); - unsigned int hash = hash_conntrack(net, zone, tuple); + unsigned int hash; + + zone = nf_ct_zone(ignored_conntrack); + hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -721,7 +720,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, ct = nf_ct_tuplehash_to_ctrack(h); if (ct != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone) { + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -810,7 +809,8 @@ void init_nf_conntrack_hash_rnd(void) } static struct nf_conn * -__nf_conntrack_alloc(struct net *net, u16 zone, +__nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) @@ -820,7 +820,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, if (unlikely(!nf_conntrack_hash_rnd)) { init_nf_conntrack_hash_rnd(); /* recompute the hash as nf_conntrack_hash_rnd is initialized */ - hash = hash_conntrack_raw(orig, zone); + hash = hash_conntrack_raw(orig); } /* We don't want any race condition at early drop stage */ @@ -840,10 +840,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone, * SLAB_DESTROY_BY_RCU. */ ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); - if (ct == NULL) { - atomic_dec(&net->ct.count); - return ERR_PTR(-ENOMEM); - } + if (ct == NULL) + goto out; + spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; @@ -857,31 +856,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone, memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif + if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) + goto out_free; + /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; - -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: - atomic_dec(&net->ct.count); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); +out: + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); -#endif } -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -923,8 +915,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp = NULL; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; + struct nf_conntrack_zone tmp; unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -932,6 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return NULL; } + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) @@ -1026,10 +1020,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, int *set_reply, enum ip_conntrack_info *ctinfo) { + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_zone tmp; struct nf_conn *ct; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), @@ -1040,7 +1035,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } /* look for tuple match */ - hash = hash_conntrack_raw(&tuple, zone); + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); + hash = hash_conntrack_raw(&tuple); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, @@ -1290,6 +1286,13 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +/* Built-in default zone used e.g. by modules. */ +const struct nf_conntrack_zone nf_ct_zone_dflt = { + .id = NF_CT_DEFAULT_ZONE_ID, + .dir = NF_CT_DEFAULT_ZONE_DIR, +}; +EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); + #ifdef CONFIG_NF_CONNTRACK_ZONES static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { .len = sizeof(struct nf_conntrack_zone), @@ -1596,8 +1599,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), - hashsize); + bucket = __hash_conntrack(&h->tuple, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index b45a4223cb05..acf5c7b3f378 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone, h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) + nf_ct_zone_equal_any(i->master, zone)) return i; } return NULL; @@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; @@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) { + nf_ct_zone_equal_any(i->master, zone)) { exp = i; break; } @@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, } return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { return a->master == b->master && a->class == b->class && - nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index bb53f120e79c..3ce5c314ea4b 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -14,6 +14,8 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> +static spinlock_t nf_connlabels_lock; + static unsigned int label_bits(const struct nf_conn_labels *l) { unsigned int longs = l->words; @@ -48,7 +50,6 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static void replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; @@ -89,7 +90,35 @@ int nf_connlabels_replace(struct nf_conn *ct, return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); -#endif + +int nf_connlabels_get(struct net *net, unsigned int n_bits) +{ + size_t words; + + if (n_bits > (NF_CT_LABELS_MAX_SIZE * BITS_PER_BYTE)) + return -ERANGE; + + words = BITS_TO_LONGS(n_bits); + + spin_lock(&nf_connlabels_lock); + net->ct.labels_used++; + if (words > net->ct.label_words) + net->ct.label_words = words; + spin_unlock(&nf_connlabels_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_get); + +void nf_connlabels_put(struct net *net) +{ + spin_lock(&nf_connlabels_lock); + net->ct.labels_used--; + if (net->ct.labels_used == 0) + net->ct.label_words = 0; + spin_unlock(&nf_connlabels_lock); +} +EXPORT_SYMBOL_GPL(nf_connlabels_put); static struct nf_ct_ext_type labels_extend __read_mostly = { .len = sizeof(struct nf_conn_labels), @@ -99,6 +128,7 @@ static struct nf_ct_ext_type labels_extend __read_mostly = { int nf_conntrack_labels_init(void) { + spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6b8b0abbfab4..94a66541e0b7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -128,6 +128,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb, } static inline int +ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, + const struct nf_conntrack_zone *zone, int dir) +{ + if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) + return 0; + if (nla_put_be16(skb, attrtype, htons(zone->id))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) @@ -458,6 +472,7 @@ static int ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; @@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || @@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) @@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { + const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) nfmsg->res_id = 0; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) @@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return ret; } +static int +ctnetlink_parse_zone(const struct nlattr *attr, + struct nf_conntrack_zone *zone) +{ + nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (attr) + zone->id = ntohs(nla_get_be16(attr)); +#else + if (attr) + return -EOPNOTSUPP; +#endif + return 0; +} + +static int +ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type, + struct nf_conntrack_zone *zone) +{ + int ret; + + if (zone->id != NF_CT_DEFAULT_ZONE_ID) + return -EINVAL; + + ret = ctnetlink_parse_zone(attr, zone); + if (ret < 0) + return ret; + + if (type == CTA_TUPLE_REPLY) + zone->dir = NF_CT_ZONE_DIR_REPL; + else + zone->dir = NF_CT_ZONE_DIR_ORIG; + + return 0; +} + static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { [CTA_TUPLE_IP] = { .type = NLA_NESTED }, [CTA_TUPLE_PROTO] = { .type = NLA_NESTED }, + [CTA_TUPLE_ZONE] = { .type = NLA_U16 }, }; static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num) + enum ctattr_type type, u_int8_t l3num, + struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], if (err < 0) return err; + if (tb[CTA_TUPLE_ZONE]) { + if (!zone) + return -EINVAL; + + err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE], + type, zone); + if (err < 0) + return err; + } + /* orig and expect tuples get DIR_ORIGINAL */ if (type == CTA_TUPLE_REPLY) tuple->dst.dir = IP_CT_DIR_REPLY; @@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } -static int -ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) -{ - if (attr) -#ifdef CONFIG_NF_CONNTRACK_ZONES - *zone = ntohs(nla_get_be16(attr)); -#else - return -EOPNOTSUPP; -#endif - else - *zone = 0; - - return 0; -} - static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, @@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else { return ctnetlink_flush_conntrack(net, cda, NETLINK_CB(skb).portid, @@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2 = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else return -EINVAL; if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(struct net *net, u16 zone, +ctnetlink_create_conntrack(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, @@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conntrack_tuple_hash *master_h; struct nf_conn *master_ct; - err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, + u3, NULL); if (err < 0) goto err2; @@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) { - err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_REPLY]) { - err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_ORIG]) - h = nf_conntrack_find_get(net, zone, &otuple); + h = nf_conntrack_find_get(net, &zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = nf_conntrack_find_get(net, zone, &rtuple); + h = nf_conntrack_find_get(net, &zone, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; - ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, + ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) return PTR_ERR(ct); @@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) ; @@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) static int ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct)) { - if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) - goto nla_put_failure; - } + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) + goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda, int err; err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); if (err < 0) return err; return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); } static int @@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - u16 zone = 0; + struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, .done = ctnetlink_exp_done, }; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; - if (cda[CTA_EXPECT_ZONE]) { - err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); - if (err < 0) - return err; - } + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_EXPECT_TUPLE]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); else if (cda[CTA_EXPECT_MASTER]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); else return -EINVAL; if (err < 0) return err; - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; + struct nf_conntrack_zone zone; unsigned int i; - u16 zone; int err; if (cda[CTA_EXPECT_TUPLE]) { @@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, return -EINVAL; err = ctnetlink_parse_tuple((const struct nlattr * const *)tb, - &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3); + &nat_tuple, CTA_EXPECT_NAT_TUPLE, + u3, NULL); if (err < 0) return err; @@ -2937,7 +3022,8 @@ err_out: } static int -ctnetlink_create_expect(struct net *net, u16 zone, +ctnetlink_create_expect(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], u_int8_t u3, u32 portid, int report) { @@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone, int err; /* caller guarantees that those three CTA_EXPECT_* exist */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; @@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (!cda[CTA_EXPECT_TUPLE] @@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; spin_lock_bh(&nf_conntrack_expect_lock); - exp = __nf_ct_expect_find(net, zone, &tuple); - + exp = __nf_ct_expect_find(net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(net, zone, cda, - u3, + err = ctnetlink_create_expect(net, &zone, cda, u3, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 825c3e3f8305..5588c7ae1ac2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_zone *zone; struct nf_conntrack_expect *exp; struct nf_conn *sibling; - u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); + zone = nf_ct_zone(ct); h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ce3e840c8704..dff0f0cc59e4 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); + sack->start_seq, new_start_seq, false); inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); + sack->end_seq, new_end_seq, false); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb, newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, + false); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index fc823fa5dcf5..1fb3cacc04e1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ + const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + + if (zone->dir != dir) + return; + switch (zone->dir) { + case NF_CT_DEFAULT_ZONE_DIR: + seq_printf(s, "zone=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_ORIG: + seq_printf(s, "zone-orig=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_REPL: + seq_printf(s, "zone-reply=%u ", zone->id); + break; + default: + break; + } +} +#else +static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ +} +#endif + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { @@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); + if (seq_has_overflowed(s)) goto release; @@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; @@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v) #endif ct_show_secctx(s, ct); - -#ifdef CONFIG_NF_CONNTRACK_ZONES - seq_printf(s, "zone=%u ", nf_ct_zone(ct)); -#endif - + ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 4e0b47831d43..5113dfd39df9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + tuple->dst.protonum ^ nf_conntrack_hash_rnd); return reciprocal_scale(hash, net->ct.nat_htable_size); } @@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, u16 zone, +find_appropriate_src(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, zone, tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { + if (same_src(ct, tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone, * the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, +find_best_ips_proto(const struct nf_conntrack_zone *zone, + struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, */ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id); full_range = false; for (i = 0; i <= max; i++) { @@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + const struct nf_conntrack_zone *zone; const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); - u16 zone = nf_ct_zone(ct); + + zone = nf_ct_zone(ct); rcu_read_lock(); l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); @@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (maniptype == NF_NAT_MANIP_SRC) { unsigned int srchash; - srchash = hash_by_src(net, nf_ct_zone(ct), + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate extension aera */ diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index b8067b53ff3a..15c47b246d0d 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); + false); return true; } diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 37f5505f4529..4f8820fc5148 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb, return true; l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b0ede2f0d8bc..b1e627227b6e 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); + false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; } diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 368f14e01e75..58340c97bd83 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb, } l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index d7f168527903..888b9558415e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -17,10 +17,12 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter/xt_SYNPROXY.h> + #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> +#include <net/netfilter/nf_conntrack_zones.h> int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); @@ -186,7 +188,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, const struct nf_conn_synproxy *synproxy) { unsigned int optoff, optend; - u32 *ptr, old; + __be32 *ptr, old; if (synproxy->tsoff == 0) return 1; @@ -214,18 +216,18 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, if (op[0] == TCPOPT_TIMESTAMP && op[1] == TCPOLEN_TIMESTAMP) { if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - ptr = (u32 *)&op[2]; + ptr = (__be32 *)&op[2]; old = *ptr; *ptr = htonl(ntohl(*ptr) - synproxy->tsoff); } else { - ptr = (u32 *)&op[6]; + ptr = (__be32 *)&op[6]; old = *ptr; *ptr = htonl(ntohl(*ptr) + synproxy->tsoff); } inet_proto_csum_replace4(&th->check, skb, - old, *ptr, 0); + old, *ptr, false); return 1; } optoff += op[1]; @@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net) struct nf_conn *ct; int err = -ENOMEM; - ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL); if (!ct) goto err1; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c18af2f63eef..fefbf5f0b28d 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -27,8 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); -static LIST_HEAD(nfnl_acct_list); - struct nf_acct { atomic64_t pkts; atomic64_t bytes; @@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; + struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (strlen(acct_name) == 0) return -EINVAL; - list_for_each_entry(nfacct, &nfnl_acct_list, head) { + list_for_each_entry(nfacct, &net->nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); - list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -185,6 +184,7 @@ nla_put_failure: static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_acct *cur, *last; const struct nfacct_filter *filter = cb->data; @@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (last) { if (cur != last) continue; @@ -257,6 +257,7 @@ static int nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) @@ -336,19 +337,20 @@ static int nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; if (!tb[NFACCT_NAME]) { - list_for_each_entry(cur, &nfnl_acct_list, head) + list_for_each_entry(cur, &net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; } acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); -struct nf_acct *nfnl_acct_find_get(const char *acct_name) +struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) { struct nf_acct *cur, *acct = NULL; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; @@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - atomic_dec(&acct->refcnt); + if (atomic_dec_and_test(&acct->refcnt)) + kfree_rcu(acct, rcu_head); + module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(nfnl_acct_put); @@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_overquota); +static int __net_init nfnl_acct_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfnl_acct_list); + + return 0; +} + +static void __net_exit nfnl_acct_net_exit(struct net *net) +{ + struct nf_acct *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { + list_del_rcu(&cur->head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations nfnl_acct_ops = { + .init = nfnl_acct_net_init, + .exit = nfnl_acct_net_exit, +}; + static int __init nfnl_acct_init(void) { int ret; + ret = register_pernet_subsys(&nfnl_acct_ops); + if (ret < 0) { + pr_err("nfnl_acct_init: failed to register pernet ops\n"); + goto err_out; + } + pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); - goto err_out; + goto cleanup_pernet; } return 0; + +cleanup_pernet: + unregister_pernet_subsys(&nfnl_acct_ops); err_out: return ret; } static void __exit nfnl_acct_exit(void) { - struct nf_acct *cur, *tmp; - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); - - list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. */ - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&nfnl_acct_ops); } module_init(nfnl_acct_init); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 17591239229f..1067fb4c1ffa 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -18,39 +18,59 @@ #include <net/netfilter/nf_tables.h> struct nft_counter { - seqlock_t lock; u64 bytes; u64 packets; }; +struct nft_counter_percpu { + struct nft_counter counter; + struct u64_stats_sync syncp; +}; + +struct nft_counter_percpu_priv { + struct nft_counter_percpu __percpu *counter; +}; + static void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_counter *priv = nft_expr_priv(expr); - - write_seqlock_bh(&priv->lock); - priv->bytes += pkt->skb->len; - priv->packets++; - write_sequnlock_bh(&priv->lock); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *this_cpu; + + local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); + u64_stats_update_begin(&this_cpu->syncp); + this_cpu->counter.bytes += pkt->skb->len; + this_cpu->counter.packets++; + u64_stats_update_end(&this_cpu->syncp); + local_bh_enable(); } static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *cpu_stats; + struct nft_counter total; + u64 bytes, packets; unsigned int seq; - u64 bytes; - u64 packets; - - do { - seq = read_seqbegin(&priv->lock); - bytes = priv->bytes; - packets = priv->packets; - } while (read_seqretry(&priv->lock, seq)); - - if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(priv->counter, cpu); + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + bytes = cpu_stats->counter.bytes; + packets = cpu_stats->counter.packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + + total.packets += packets; + total.bytes += bytes; + } + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || + nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) goto nla_put_failure; return 0; @@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu __percpu *cpu_stats; + struct nft_counter_percpu *this_cpu; + + cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); + if (cpu_stats == NULL) + return ENOMEM; + + preempt_disable(); + this_cpu = this_cpu_ptr(cpu_stats); + if (tb[NFTA_COUNTER_PACKETS]) { + this_cpu->counter.packets = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + } + if (tb[NFTA_COUNTER_BYTES]) { + this_cpu->counter.bytes = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + } + preempt_enable(); + priv->counter = cpu_stats; + return 0; +} - if (tb[NFTA_COUNTER_PACKETS]) - priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); - if (tb[NFTA_COUNTER_BYTES]) - priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); +static void nft_counter_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - seqlock_init(&priv->lock); - return 0; + free_percpu(priv->counter); } static struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), .eval = nft_counter_eval, .init = nft_counter_init, + .destroy = nft_counter_destroy, .dump = nft_counter_dump, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 435c1ccd6c0e..5d67938f8b2f 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -20,63 +20,79 @@ static DEFINE_SPINLOCK(limit_lock); struct nft_limit { + u64 last; u64 tokens; + u64 tokens_max; u64 rate; - u64 unit; - unsigned long stamp; + u64 nsecs; + u32 burst; }; -static void nft_limit_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 now, tokens; + s64 delta; spin_lock_bh(&limit_lock); - if (time_after_eq(jiffies, priv->stamp)) { - priv->tokens = priv->rate; - priv->stamp = jiffies + priv->unit * HZ; - } - - if (priv->tokens >= 1) { - priv->tokens--; + now = ktime_get_ns(); + tokens = limit->tokens + now - limit->last; + if (tokens > limit->tokens_max) + tokens = limit->tokens_max; + + limit->last = now; + delta = tokens - cost; + if (delta >= 0) { + limit->tokens = delta; spin_unlock_bh(&limit_lock); - return; + return false; } + limit->tokens = tokens; spin_unlock_bh(&limit_lock); - - regs->verdict.code = NFT_BREAK; + return true; } -static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { - [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, - [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, -}; - -static int nft_limit_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, +static int nft_limit_init(struct nft_limit *limit, const struct nlattr * const tb[]) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 unit; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; - priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); - priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->stamp = jiffies + priv->unit * HZ; - priv->tokens = priv->rate; + limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + limit->nsecs = unit * NSEC_PER_SEC; + if (limit->rate == 0 || limit->nsecs < unit) + return -EOVERFLOW; + limit->tokens = limit->tokens_max = limit->nsecs; + + if (tb[NFTA_LIMIT_BURST]) { + u64 rate; + + limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + + rate = limit->rate + limit->burst; + if (rate < limit->rate) + return -EOVERFLOW; + + limit->rate = rate; + } + limit->last = ktime_get_ns(); + return 0; } -static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, + enum nft_limit_type type) { - const struct nft_limit *priv = nft_expr_priv(expr); + u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); + u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || + nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || + nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || + nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type))) goto nla_put_failure; return 0; @@ -84,18 +100,114 @@ nla_put_failure: return -1; } +struct nft_limit_pkts { + struct nft_limit limit; + u64 cost; +}; + +static void nft_limit_pkts_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + + if (nft_limit_eval(&priv->limit, priv->cost)) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, + [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, + [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_limit_pkts_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + int err; + + err = nft_limit_init(&priv->limit, tb); + if (err < 0) + return err; + + priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate); + return 0; +} + +static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit_pkts *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); +} + static struct nft_expr_type nft_limit_type; -static const struct nft_expr_ops nft_limit_ops = { +static const struct nft_expr_ops nft_limit_pkts_ops = { + .type = &nft_limit_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .eval = nft_limit_pkts_eval, + .init = nft_limit_pkts_init, + .dump = nft_limit_pkts_dump, +}; + +static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate); + + if (nft_limit_eval(priv, cost)) + regs->verdict.code = NFT_BREAK; +} + +static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_init(priv, tb); +} + +static int nft_limit_pkt_bytes_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); +} + +static const struct nft_expr_ops nft_limit_pkt_bytes_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .eval = nft_limit_eval, - .init = nft_limit_init, - .dump = nft_limit_dump, + .eval = nft_limit_pkt_bytes_eval, + .init = nft_limit_pkt_bytes_init, + .dump = nft_limit_pkt_bytes_dump, }; +static const struct nft_expr_ops * +nft_limit_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_LIMIT_TYPE] == NULL) + return &nft_limit_pkts_ops; + + switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { + case NFT_LIMIT_PKTS: + return &nft_limit_pkts_ops; + case NFT_LIMIT_PKT_BYTES: + return &nft_limit_pkt_bytes_ops; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_limit_type __read_mostly = { .name = "limit", - .ops = &nft_limit_ops, + .select_ops = nft_limit_select_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, .flags = NFT_EXPR_STATEFUL, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 94fb3b27a2c5..09b4b07eb676 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> @@ -17,6 +18,53 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +/* add vlan header into the user buffer for if tag was removed by offloads */ +static bool +nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) +{ + int mac_off = skb_mac_header(skb) - skb->data; + u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d; + struct vlan_ethhdr veth; + + vlanh = (u8 *) &veth; + if (offset < ETH_HLEN) { + u8 ethlen = min_t(u8, len, ETH_HLEN - offset); + + if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN)) + return false; + + veth.h_vlan_proto = skb->vlan_proto; + + memcpy(dst_u8, vlanh + offset, ethlen); + + len -= ethlen; + if (len == 0) + return true; + + dst_u8 += ethlen; + offset = ETH_HLEN; + } else if (offset >= VLAN_ETH_HLEN) { + offset -= VLAN_HLEN; + goto skip; + } + + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + vlanh += offset; + + vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset); + memcpy(dst_u8, vlanh, vlan_len); + + len -= vlan_len; + if (!len) + return true; + + dst_u8 += vlan_len; + skip: + return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; +} + static void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr, u32 *dest = ®s->data[priv->dreg]; int offset; + dest[priv->len / NFT_REG32_SIZE] = 0; switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: if (!skb_mac_header_was_set(skb)) goto err; + + if (skb_vlan_tag_present(skb)) { + if (!nft_payload_copy_vlan(dest, skb, + priv->offset, priv->len)) + goto err; + return; + } offset = skb_mac_header(skb) - skb->data; break; case NFT_PAYLOAD_NETWORK_HEADER: @@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 43ddeee404e9..8e524898ccea 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -181,9 +181,23 @@ out: #endif } +static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info) +{ + switch (info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL)) { + case XT_CT_ZONE_DIR_ORIG: + return NF_CT_ZONE_DIR_ORIG; + case XT_CT_ZONE_DIR_REPL: + return NF_CT_ZONE_DIR_REPL; + default: + return NF_CT_DEFAULT_ZONE_DIR; + } +} + static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { + struct nf_conntrack_zone zone; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, } #ifndef CONFIG_NF_CONNTRACK_ZONES - if (info->zone) + if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL | + XT_CT_ZONE_MARK)) goto err1; #endif @@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); + memset(&zone, 0, sizeof(zone)); + zone.id = info->zone; + zone.dir = xt_ct_flags_to_dir(info); + if (info->flags & XT_CT_ZONE_MARK) + zone.flags |= NF_CT_FLAG_MARK; + + ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL); if (!ct) { ret = -ENOMEM; goto err2; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c3190e2fc6a..8c02501a530f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), - 0); + false); return 0; } } @@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb, memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(len), htons(len + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), true); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return TCPOLEN_MSS; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 625fa1d636a0..eb92bffff11c 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, n <<= 8; } inet_proto_csum_replace2(&tcph->check, skb, htons(o), - htons(n), 0); + htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); } diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index c5d6556dbc5e..fd980aa7715d 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -10,26 +10,15 @@ * modify it under the terms of the GNU General Public License * version 2 or later, as published by the Free Software Foundation. */ -#include <linux/ip.h> #include <linux/module.h> -#include <linux/percpu.h> -#include <linux/route.h> #include <linux/skbuff.h> -#include <linux/notifier.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/ip6_route.h> -#include <net/route.h> +#include <linux/route.h> #include <linux/netfilter/x_tables.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -# define WITH_CONNTRACK 1 -# include <net/netfilter/nf_conntrack.h> -#endif - struct xt_tee_priv { struct notifier_block notifier; struct xt_tee_tginfo *tginfo; @@ -38,161 +27,24 @@ struct xt_tee_priv { static const union nf_inet_addr tee_zero_address; -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool -tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct iphdr *iph = ip_hdr(skb); - struct net *net = pick_net(skb); - struct rtable *rt; - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl4.flowi4_oif = info->priv->oif; - } - fl4.daddr = info->gw.ip; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_scope = RT_SCOPE_UNIVERSE; - fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return false; - - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - skb->dev = rt->dst.dev; - skb->protocol = htons(ETH_P_IP); - return true; -} - static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - struct iphdr *iph; - if (__this_cpu_read(nf_skb_duplicated)) - return XT_CONTINUE; - /* - * Copy the skb, and route the copy. Will later return %XT_CONTINUE for - * the original skb, which should continue on its way as if nothing has - * happened. The copy should be independently delivered to the TEE - * --gateway. - */ - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; - -#ifdef WITH_CONNTRACK - /* Avoid counting cloned packets towards the original connection. */ - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential TEE loops - * between two hosts. - * - * Set %IP_DF so that the original source is notified of a potentially - * decreased MTU on the clone route. IPv6 does this too. - */ - iph = ip_hdr(skb); - iph->frag_off |= htons(IP_DF); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) - --iph->ttl; - ip_send_check(iph); + nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); - if (tee_tg_route4(skb, info)) { - __this_cpu_write(nf_skb_duplicated, true); - ip_local_out(skb); - __this_cpu_write(nf_skb_duplicated, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } -#if IS_ENABLED(CONFIG_IPV6) -static bool -tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = pick_net(skb); - struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl6.flowi6_oif = info->priv->oif; - } - fl6.daddr = info->gw.in6; - fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - return false; - } - skb_dst_drop(skb); - skb_dst_set(skb, dst); - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - return true; -} - +#if IS_ENABLED(CONFIG_NF_DUP_IPV6) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - if (__this_cpu_read(nf_skb_duplicated)) - return XT_CONTINUE; - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; + nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); -#ifdef WITH_CONNTRACK - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) { - struct ipv6hdr *iph = ipv6_hdr(skb); - --iph->hop_limit; - } - if (tee_tg_route6(skb, info)) { - __this_cpu_write(nf_skb_duplicated, true); - ip6_local_out(skb); - __this_cpu_write(nf_skb_duplicated, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } #endif @@ -277,7 +129,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .destroy = tee_tg_destroy, .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_NF_DUP_IPV6) { .name = "TEE", .revision = 1, diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 9f8719df2001..bb9cbeb18868 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -42,10 +42,6 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) XT_CONNLABEL_OP_SET; struct xt_connlabel_mtinfo *info = par->matchinfo; int ret; - size_t words; - - if (info->bit > XT_CONNLABEL_MAXBIT) - return -ERANGE; if (info->options & ~options) { pr_err("Unknown options in mask %x\n", info->options); @@ -59,19 +55,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return ret; } - par->net->ct.labels_used++; - words = BITS_TO_LONGS(info->bit+1); - if (words > par->net->ct.label_words) - par->net->ct.label_words = words; - + ret = nf_connlabels_get(par->net, info->bit + 1); + if (ret < 0) + nf_ct_l3proto_module_put(par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { - par->net->ct.labels_used--; - if (par->net->ct.labels_used == 0) - par->net->ct.label_words = 0; + nf_connlabels_put(par->net); nf_ct_l3proto_module_put(par->family); } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 29ba6218a820..075d89d94d28 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head, static unsigned int check_hlist(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, - u16 zone, + const struct nf_conntrack_zone *zone, bool *addit) { const struct nf_conntrack_tuple_hash *found; @@ -201,7 +201,7 @@ static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u8 family, u16 zone) + u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; @@ -290,7 +290,8 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family, u16 zone) + u_int8_t family, + const struct nf_conntrack_zone *zone) { struct rb_root *root; int count; @@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; - u16 zone = NF_CT_DEFAULT_ZONE; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 8c646ed9c921..3048a7e3a90a 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; - nfacct = nfnl_acct_find_get(info->name); + nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info("xt_nfacct: accounting object with name `%s' " "does not exists\n", info->name); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 67d210477863..a774985489e2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2401,7 +2401,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) * sendmsg(), but that's what we've got... */ if (netlink_tx_is_mmaped(sk) && - msg->msg_iter.type == ITER_IOVEC && + iter_is_iovec(&msg->msg_iter) && msg->msg_iter.nr_segs == 1 && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 95af2d24d5be..943889b87a34 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -351,6 +351,20 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload) } EXPORT_SYMBOL(nci_prop_cmd); +int nci_core_reset(struct nci_dev *ndev) +{ + return __nci_request(ndev, nci_reset_req, 0, + msecs_to_jiffies(NCI_RESET_TIMEOUT)); +} +EXPORT_SYMBOL(nci_core_reset); + +int nci_core_init(struct nci_dev *ndev) +{ + return __nci_request(ndev, nci_init_req, 0, + msecs_to_jiffies(NCI_INIT_TIMEOUT)); +} +EXPORT_SYMBOL(nci_core_init); + static int nci_open_device(struct nci_dev *ndev) { int rc = 0; @@ -388,6 +402,10 @@ static int nci_open_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_INIT_TIMEOUT)); } + if (ndev->ops->post_setup) { + rc = ndev->ops->post_setup(ndev); + } + if (!rc) { rc = __nci_request(ndev, nci_init_complete_req, 0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index af002df640c7..609f92283d1b 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -233,7 +233,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) + if (r == NCI_STATUS_OK && skb) *skb = conn_info->rx_skb; return r; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f85f37ed19b2..853172c27f68 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -63,6 +63,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, + }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { @@ -1503,7 +1505,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, u32 dev_idx, vid, subcmd; u8 *data; size_t data_len; - int i; + int i, err; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_VENDOR_ID] || @@ -1518,12 +1520,13 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) return -ENODEV; - data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); - if (data) { + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); if (data_len == 0) return -EINVAL; } else { + data = NULL; data_len = 0; } @@ -1533,12 +1536,92 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, if (cmd->vendor_id != vid || cmd->subcmd != subcmd) continue; - return cmd->doit(dev, data, data_len); + dev->cur_cmd_info = info; + err = cmd->doit(dev, data, data_len); + dev->cur_cmd_info = NULL; + return err; } return -EOPNOTSUPP; } +/* message building helper */ +static inline void *nfc_hdr_put(struct sk_buff *skb, u32 portid, u32 seq, + int flags, u8 cmd) +{ + /* since there is no private header just add the generic one */ + return genlmsg_put(skb, portid, seq, &nfc_genl_family, flags, cmd); +} + +static struct sk_buff * +__nfc_alloc_vendor_cmd_skb(struct nfc_dev *dev, int approxlen, + u32 portid, u32 seq, + enum nfc_attrs attr, + u32 oui, u32 subcmd, gfp_t gfp) +{ + struct sk_buff *skb; + void *hdr; + + skb = nlmsg_new(approxlen + 100, gfp); + if (!skb) + return NULL; + + hdr = nfc_hdr_put(skb, portid, seq, 0, NFC_CMD_VENDOR); + if (!hdr) { + kfree_skb(skb); + return NULL; + } + + if (nla_put_u32(skb, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + if (nla_put_u32(skb, NFC_ATTR_VENDOR_ID, oui)) + goto nla_put_failure; + if (nla_put_u32(skb, NFC_ATTR_VENDOR_SUBCMD, subcmd)) + goto nla_put_failure; + + ((void **)skb->cb)[0] = dev; + ((void **)skb->cb)[1] = hdr; + + return skb; + +nla_put_failure: + kfree_skb(skb); + return NULL; +} + +struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev, + enum nfc_attrs attr, + u32 oui, u32 subcmd, + int approxlen) +{ + if (WARN_ON(!dev->cur_cmd_info)) + return NULL; + + return __nfc_alloc_vendor_cmd_skb(dev, approxlen, + dev->cur_cmd_info->snd_portid, + dev->cur_cmd_info->snd_seq, attr, + oui, subcmd, GFP_KERNEL); +} +EXPORT_SYMBOL(__nfc_alloc_vendor_cmd_reply_skb); + +int nfc_vendor_cmd_reply(struct sk_buff *skb) +{ + struct nfc_dev *dev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + + if (WARN_ON(!dev->cur_cmd_info)) { + kfree_skb(skb); + return -EINVAL; + } + + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, dev->cur_cmd_info); +} +EXPORT_SYMBOL(nfc_vendor_cmd_reply); + static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 422dc0567de9..af7cdef42066 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -31,6 +31,17 @@ config OPENVSWITCH If unsure, say N. +config OPENVSWITCH_CONNTRACK + bool "Open vSwitch conntrack action support" + depends on OPENVSWITCH + depends on NF_CONNTRACK + default OPENVSWITCH + ---help--- + If you say Y here, then Open vSwitch module will be able to pass + packets through conntrack. + + Say N to exclude this support and reduce the binary size. + config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" depends on OPENVSWITCH @@ -59,7 +70,7 @@ config OPENVSWITCH_VXLAN config OPENVSWITCH_GENEVE tristate "Open vSwitch Geneve tunneling support" depends on OPENVSWITCH - depends on GENEVE_CORE + depends on GENEVE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create geneve vport. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 6e1701de04d8..5b5913b06f54 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -15,6 +15,8 @@ openvswitch-y := \ vport-internal_dev.o \ vport-netdev.o +openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o + obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 14da52ddd327..4487543806bb 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -22,6 +22,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/openvswitch.h> +#include <linux/netfilter_ipv6.h> #include <linux/sctp.h> #include <linux/tcp.h> #include <linux/udp.h> @@ -29,8 +30,10 @@ #include <linux/if_arp.h> #include <linux/if_vlan.h> +#include <net/dst.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/ip6_fib.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/mpls.h> @@ -38,6 +41,7 @@ #include "datapath.h" #include "flow.h" +#include "conntrack.h" #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, @@ -52,6 +56,20 @@ struct deferred_action { struct sw_flow_key pkt_key; }; +#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) +struct ovs_frag_data { + unsigned long dst; + struct vport *vport; + struct ovs_skb_cb cb; + __be16 inner_protocol; + __u16 vlan_tci; + __be16 vlan_proto; + unsigned int l2_len; + u8 l2_data[MAX_L2_LEN]; +}; + +static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); + #define DEFERRED_ACTION_FIFO_SIZE 10 struct action_fifo { int head; @@ -185,10 +203,6 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -/* 'KEY' must not have any bits set outside of the 'MASK' */ -#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) -#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) - static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { @@ -201,7 +215,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, return err; stack = (__be32 *)skb_mpls_header(skb); - lse = MASKED(*stack, *mpls_lse, *mask); + lse = OVS_MASKED(*stack, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~(*stack), lse }; @@ -244,9 +258,9 @@ static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) const u16 *src = (const u16 *)src_; const u16 *mask = (const u16 *)mask_; - SET_MASKED(dst[0], src[0], mask[0]); - SET_MASKED(dst[1], src[1], mask[1]); - SET_MASKED(dst[2], src[2], mask[2]); + OVS_SET_MASKED(dst[0], src[0], mask[0]); + OVS_SET_MASKED(dst[1], src[1], mask[1]); + OVS_SET_MASKED(dst[2], src[2], mask[2]); } static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, @@ -284,14 +298,14 @@ static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, if (nh->protocol == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (nh->protocol == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -316,14 +330,14 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -331,17 +345,17 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } else if (l4_proto == NEXTHDR_ICMP) { if (likely(transport_len >= sizeof(struct icmp6hdr))) inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, - skb, addr, new_addr, 1); + skb, addr, new_addr, true); } } static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], const __be32 mask[4], __be32 masked[4]) { - masked[0] = MASKED(old[0], addr[0], mask[0]); - masked[1] = MASKED(old[1], addr[1], mask[1]); - masked[2] = MASKED(old[2], addr[2], mask[2]); - masked[3] = MASKED(old[3], addr[3], mask[3]); + masked[0] = OVS_MASKED(old[0], addr[0], mask[0]); + masked[1] = OVS_MASKED(old[1], addr[1], mask[1]); + masked[2] = OVS_MASKED(old[2], addr[2], mask[2]); + masked[3] = OVS_MASKED(old[3], addr[3], mask[3]); } static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, @@ -358,15 +372,15 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { /* Bits 21-24 are always unmasked, so this retains their values. */ - SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); + OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); + OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, u8 mask) { - new_ttl = MASKED(nh->ttl, new_ttl, mask); + new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask); csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; @@ -392,7 +406,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, * makes sense to check if the value actually changed. */ if (mask->ipv4_src) { - new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); + new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); if (unlikely(new_addr != nh->saddr)) { set_ip_addr(skb, nh, &nh->saddr, new_addr); @@ -400,7 +414,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv4_dst) { - new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); + new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); if (unlikely(new_addr != nh->daddr)) { set_ip_addr(skb, nh, &nh->daddr, new_addr); @@ -488,7 +502,8 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); + OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, + mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; @@ -498,7 +513,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { - inet_proto_csum_replace2(check, skb, *port, new_port, 0); + inet_proto_csum_replace2(check, skb, *port, new_port, false); *port = new_port; } @@ -517,8 +532,8 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, uh = udp_hdr(skb); /* Either of the masks is non-zero, so do not bother checking them. */ - src = MASKED(uh->source, key->udp_src, mask->udp_src); - dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); + src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src); + dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst); if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { if (likely(src != uh->source)) { @@ -558,12 +573,12 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, return err; th = tcp_hdr(skb); - src = MASKED(th->source, key->tcp_src, mask->tcp_src); + src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src); if (likely(src != th->source)) { set_tp_port(skb, &th->source, src, &th->check); flow_key->tp.src = src; } - dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); + dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst); if (likely(dst != th->dest)) { set_tp_port(skb, &th->dest, dst, &th->check); flow_key->tp.dst = dst; @@ -590,8 +605,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, old_csum = sh->checksum; old_correct_csum = sctp_compute_cksum(skb, sctphoff); - sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); - sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); + sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src); + sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); new_csum = sctp_compute_cksum(skb, sctphoff); @@ -605,14 +620,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) +{ + struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); + struct vport *vport = data->vport; + + if (skb_cow_head(skb, data->l2_len) < 0) { + kfree_skb(skb); + return -ENOMEM; + } + + __skb_dst_copy(skb, data->dst); + *OVS_CB(skb) = data->cb; + skb->inner_protocol = data->inner_protocol; + skb->vlan_tci = data->vlan_tci; + skb->vlan_proto = data->vlan_proto; + + /* Reconstruct the MAC header. */ + skb_push(skb, data->l2_len); + memcpy(skb->data, &data->l2_data, data->l2_len); + ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_reset_mac_header(skb); + + ovs_vport_send(vport, skb); + return 0; +} + +static unsigned int +ovs_dst_get_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops ovs_dst_ops = { + .family = AF_UNSPEC, + .mtu = ovs_dst_get_mtu, +}; + +/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is + * ovs_vport_output(), which is called once per fragmented packet. + */ +static void prepare_frag(struct vport *vport, struct sk_buff *skb) +{ + unsigned int hlen = skb_network_offset(skb); + struct ovs_frag_data *data; + + data = this_cpu_ptr(&ovs_frag_data_storage); + data->dst = skb->_skb_refdst; + data->vport = vport; + data->cb = *OVS_CB(skb); + data->inner_protocol = skb->inner_protocol; + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + data->l2_len = hlen; + memcpy(&data->l2_data, skb->data, hlen); + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + skb_pull(skb, hlen); +} + +static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, + __be16 ethertype) +{ + if (skb_network_offset(skb) > MAX_L2_LEN) { + OVS_NLERR(1, "L2 header too long to fragment"); + return; + } + + if (ethertype == htons(ETH_P_IP)) { + struct dst_entry ovs_dst; + unsigned long orig_dst; + + prepare_frag(vport, skb); + dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + ovs_dst.dev = vport->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &ovs_dst); + IPCB(skb)->frag_max_size = mru; + + ip_do_fragment(skb->sk, skb, ovs_vport_output); + refdst_drop(orig_dst); + } else if (ethertype == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + unsigned long orig_dst; + struct rt6_info ovs_rt; + + if (!v6ops) { + kfree_skb(skb); + return; + } + + prepare_frag(vport, skb); + memset(&ovs_rt, 0, sizeof(ovs_rt)); + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + DST_OBSOLETE_NONE, DST_NOCOUNT); + ovs_rt.dst.dev = vport->dev; + + orig_dst = skb->_skb_refdst; + skb_dst_set_noref(skb, &ovs_rt.dst); + IP6CB(skb)->frag_max_size = mru; + + v6ops->fragment(skb->sk, skb, ovs_vport_output); + refdst_drop(orig_dst); + } else { + WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", + ovs_vport_name(vport), ntohs(ethertype), mru, + vport->dev->mtu); + kfree_skb(skb); + } +} + +static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, + struct sw_flow_key *key) { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport)) - ovs_vport_send(vport, skb); - else + if (likely(vport)) { + u16 mru = OVS_CB(skb)->mru; + + if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { + ovs_vport_send(vport, skb); + } else if (mru <= vport->dev->mtu) { + __be16 ethertype = key->eth.type; + + if (!is_flow_key_valid(key)) { + if (eth_p_mpls(skb->protocol)) + ethertype = skb->inner_protocol; + else + ethertype = vlan_get_protocol(skb); + } + + ovs_fragment(vport, skb, mru, ethertype); + } else { + kfree_skb(skb); + } + } else { kfree_skb(skb); + } } static int output_userspace(struct datapath *dp, struct sk_buff *skb, @@ -626,6 +772,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_ACTION; + upcall.mru = OVS_CB(skb)->mru; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -770,12 +917,13 @@ static int execute_masked_set_action(struct sk_buff *skb, switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); + OVS_SET_MASKED(skb->priority, nla_get_u32(a), + *get_mask(a, u32 *)); flow_key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: - SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); + OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); flow_key->phy.skb_mark = skb->mark; break; @@ -818,6 +966,13 @@ static int execute_masked_set_action(struct sk_buff *skb, err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, __be32 *)); break; + + case OVS_KEY_ATTR_CT_STATE: + case OVS_KEY_ATTR_CT_ZONE: + case OVS_KEY_ATTR_CT_MARK: + case OVS_KEY_ATTR_CT_LABEL: + err = -EINVAL; + break; } return err; @@ -887,7 +1042,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC); if (out_skb) - do_output(dp, out_skb, prev_port); + do_output(dp, out_skb, prev_port, key); prev_port = -1; } @@ -944,6 +1099,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a, attr, len); break; + + case OVS_ACTION_ATTR_CT: + err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, + nla_data(a)); + + /* Hide stolen IP fragments from user space. */ + if (err == -EINPROGRESS) + return 0; + break; } if (unlikely(err)) { @@ -953,7 +1117,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, } if (prev_port != -1) - do_output(dp, skb, prev_port); + do_output(dp, skb, prev_port, key); else consume_skb(skb); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c new file mode 100644 index 000000000000..e8e524ad8a01 --- /dev/null +++ b/net/openvswitch/conntrack.c @@ -0,0 +1,755 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/openvswitch.h> +#include <net/ip.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> + +#include "datapath.h" +#include "conntrack.h" +#include "flow.h" +#include "flow_netlink.h" + +struct ovs_ct_len_tbl { + size_t maxlen; + size_t minlen; +}; + +/* Metadata mark for masked write to conntrack mark */ +struct md_mark { + u32 value; + u32 mask; +}; + +/* Metadata label for masked write to conntrack label. */ +struct md_label { + struct ovs_key_ct_label value; + struct ovs_key_ct_label mask; +}; + +/* Conntrack action context for execution. */ +struct ovs_conntrack_info { + struct nf_conntrack_helper *helper; + struct nf_conntrack_zone zone; + struct nf_conn *ct; + u32 flags; + u16 family; + struct md_mark mark; + struct md_label label; +}; + +static u16 key_to_nfproto(const struct sw_flow_key *key) +{ + switch (ntohs(key->eth.type)) { + case ETH_P_IP: + return NFPROTO_IPV4; + case ETH_P_IPV6: + return NFPROTO_IPV6; + default: + return NFPROTO_UNSPEC; + } +} + +/* Map SKB connection state into the values used by flow definition. */ +static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) +{ + u8 ct_state = OVS_CS_F_TRACKED; + + switch (ctinfo) { + case IP_CT_ESTABLISHED_REPLY: + case IP_CT_RELATED_REPLY: + case IP_CT_NEW_REPLY: + ct_state |= OVS_CS_F_REPLY_DIR; + break; + default: + break; + } + + switch (ctinfo) { + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + ct_state |= OVS_CS_F_ESTABLISHED; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + ct_state |= OVS_CS_F_RELATED; + break; + case IP_CT_NEW: + case IP_CT_NEW_REPLY: + ct_state |= OVS_CS_F_NEW; + break; + default: + break; + } + + return ct_state; +} + +static u32 ovs_ct_get_mark(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) + return ct ? ct->mark : 0; +#else + return 0; +#endif +} + +static void ovs_ct_get_label(const struct nf_conn *ct, + struct ovs_key_ct_label *label) +{ + struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; + + if (cl) { + size_t len = cl->words * sizeof(long); + + if (len > OVS_CT_LABEL_LEN) + len = OVS_CT_LABEL_LEN; + else if (len < OVS_CT_LABEL_LEN) + memset(label, 0, OVS_CT_LABEL_LEN); + memcpy(label, cl->bits, len); + } else { + memset(label, 0, OVS_CT_LABEL_LEN); + } +} + +static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, + const struct nf_conntrack_zone *zone, + const struct nf_conn *ct) +{ + key->ct.state = state; + key->ct.zone = zone->id; + key->ct.mark = ovs_ct_get_mark(ct); + ovs_ct_get_label(ct, &key->ct.label); +} + +/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has + * previously sent the packet to conntrack via the ct action. + */ +static void ovs_ct_update_key(const struct sk_buff *skb, + struct sw_flow_key *key, bool post_ct) +{ + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u8 state = 0; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + state = ovs_ct_get_state(ctinfo); + if (ct->master) + state |= OVS_CS_F_RELATED; + zone = nf_ct_zone(ct); + } else if (post_ct) { + state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; + } + __ovs_ct_update_key(key, state, zone, ct); +} + +void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) +{ + ovs_ct_update_key(skb, key, false); +} + +int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) +{ + if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + return -EMSGSIZE; + + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) + return -EMSGSIZE; + + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) + return -EMSGSIZE; + + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), + &key->ct.label)) + return -EMSGSIZE; + + return 0; +} + +static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, + u32 ct_mark, u32 mask) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u32 new_mark; + + + /* The connection could be invalid, in which case set_mark is no-op. */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + + new_mark = ct_mark | (ct->mark & ~(mask)); + if (ct->mark != new_mark) { + ct->mark = new_mark; + nf_conntrack_event_cache(IPCT_MARK, ct); + key->ct.mark = new_mark; + } + + return 0; +#else + return -ENOTSUPP; +#endif +} + +static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ct_label *label, + const struct ovs_key_ct_label *mask) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn_labels *cl; + struct nf_conn *ct; + int err; + + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) + return -ENOTSUPP; + + /* The connection could be invalid, in which case set_label is no-op.*/ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + + cl = nf_ct_labels_find(ct); + if (!cl) { + nf_ct_labels_ext_add(ct); + cl = nf_ct_labels_find(ct); + } + if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) + return -ENOSPC; + + err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, + OVS_CT_LABEL_LEN / sizeof(u32)); + if (err) + return err; + + ovs_ct_get_label(ct, &key->ct.label); + return 0; +} + +/* 'skb' should already be pulled to nh_ofs. */ +static int ovs_ct_helper(struct sk_buff *skb, u16 proto) +{ + const struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + enum ip_conntrack_info ctinfo; + unsigned int protoff; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + return NF_ACCEPT; + + help = nfct_help(ct); + if (!help) + return NF_ACCEPT; + + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; + + switch (proto) { + case NFPROTO_IPV4: + protoff = ip_hdrlen(skb); + break; + case NFPROTO_IPV6: { + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + __be16 frag_off; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + return NF_ACCEPT; + } + break; + } + default: + WARN_ONCE(1, "helper invoked on non-IP family!"); + return NF_DROP; + } + + return helper->help(skb, protoff, ct, ctinfo); +} + +static int handle_fragments(struct net *net, struct sw_flow_key *key, + u16 zone, struct sk_buff *skb) +{ + struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + + if (key->eth.type == htons(ETH_P_IP)) { + enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; + int err; + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + err = ip_defrag(skb, user); + if (err) + return err; + + ovs_cb.mru = IPCB(skb)->frag_max_size; + } else if (key->eth.type == htons(ETH_P_IPV6)) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + struct sk_buff *reasm; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + reasm = nf_ct_frag6_gather(skb, user); + if (!reasm) + return -EINPROGRESS; + + if (skb == reasm) + return -EINVAL; + + key->ip.proto = ipv6_hdr(reasm)->nexthdr; + skb_morph(skb, reasm); + consume_skb(reasm); + ovs_cb.mru = IP6CB(skb)->frag_max_size; +#else + return -EPFNOSUPPORT; +#endif + } else { + return -EPFNOSUPPORT; + } + + key->ip.frag = OVS_FRAG_TYPE_NONE; + skb_clear_hash(skb); + skb->ignore_df = 1; + *OVS_CB(skb) = ovs_cb; + + return 0; +} + +static struct nf_conntrack_expect * +ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, + u16 proto, const struct sk_buff *skb) +{ + struct nf_conntrack_tuple tuple; + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) + return NULL; + return __nf_ct_expect_find(net, zone, &tuple); +} + +/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ +static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, + const struct ovs_conntrack_info *info) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return false; + if (!net_eq(net, read_pnet(&ct->ct_net))) + return false; + if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) + return false; + if (info->helper) { + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + if (help && rcu_access_pointer(help->helper) != info->helper) + return false; + } + + return true; +} + +static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + /* If we are recirculating packets to match on conntrack fields and + * committing with a separate conntrack action, then we don't need to + * actually run the packet through conntrack twice unless it's for a + * different zone. + */ + if (!skb_nfct_cached(net, skb, info)) { + struct nf_conn *tmpl = info->ct; + + /* Associate skb with specified zone. */ + if (tmpl) { + if (skb->nfct) + nf_conntrack_put(skb->nfct); + nf_conntrack_get(&tmpl->ct_general); + skb->nfct = &tmpl->ct_general; + skb->nfctinfo = IP_CT_NEW; + } + + if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING, + skb) != NF_ACCEPT) + return -ENOENT; + + if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) { + WARN_ONCE(1, "helper rejected packet"); + return -EINVAL; + } + } + + return 0; +} + +/* Lookup connection and read fields into key. */ +static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + struct nf_conntrack_expect *exp; + + exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); + if (exp) { + u8 state; + + state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; + __ovs_ct_update_key(key, state, &info->zone, exp->master); + } else { + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ovs_ct_update_key(skb, key, true); + } + + return 0; +} + +/* Lookup connection and confirm if unconfirmed. */ +static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, + const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + u8 state; + int err; + + state = key->ct.state; + if (key->ct.zone == info->zone.id && + ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) { + /* Previous lookup has shown that this connection is already + * tracked and committed. Skip committing. + */ + return 0; + } + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + if (nf_conntrack_confirm(skb) != NF_ACCEPT) + return -EINVAL; + + ovs_ct_update_key(skb, key, true); + + return 0; +} + +static bool label_nonzero(const struct ovs_key_ct_label *label) +{ + size_t i; + + for (i = 0; i < sizeof(*label); i++) + if (label->ct_label[i]) + return true; + + return false; +} + +int ovs_ct_execute(struct net *net, struct sk_buff *skb, + struct sw_flow_key *key, + const struct ovs_conntrack_info *info) +{ + int nh_ofs; + int err; + + /* The conntrack module expects to be working at L3. */ + nh_ofs = skb_network_offset(skb); + skb_pull(skb, nh_ofs); + + if (key->ip.frag != OVS_FRAG_TYPE_NONE) { + err = handle_fragments(net, key, info->zone.id, skb); + if (err) + return err; + } + + if (info->flags & OVS_CT_F_COMMIT) + err = ovs_ct_commit(net, key, info, skb); + else + err = ovs_ct_lookup(net, key, info, skb); + if (err) + goto err; + + if (info->mark.mask) { + err = ovs_ct_set_mark(skb, key, info->mark.value, + info->mark.mask); + if (err) + goto err; + } + if (label_nonzero(&info->label.mask)) + err = ovs_ct_set_label(skb, key, &info->label.value, + &info->label.mask); +err: + skb_push(skb, nh_ofs); + return err; +} + +static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, + const struct sw_flow_key *key, bool log) +{ + struct nf_conntrack_helper *helper; + struct nf_conn_help *help; + + helper = nf_conntrack_helper_try_module_get(name, info->family, + key->ip.proto); + if (!helper) { + OVS_NLERR(log, "Unknown helper \"%s\"", name); + return -EINVAL; + } + + help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); + if (!help) { + module_put(helper->me); + return -ENOMEM; + } + + rcu_assign_pointer(help->helper, helper); + info->helper = helper; + return 0; +} + +static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { + [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), + .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), + .maxlen = sizeof(u16) }, + [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), + .maxlen = sizeof(struct md_mark) }, + [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), + .maxlen = sizeof(struct md_label) }, + [OVS_CT_ATTR_HELPER] = { .minlen = 1, + .maxlen = NF_CT_HELPER_NAME_LEN } +}; + +static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, + const char **helper, bool log) +{ + struct nlattr *a; + int rem; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int maxlen = ovs_ct_attr_lens[type].maxlen; + int minlen = ovs_ct_attr_lens[type].minlen; + + if (type > OVS_CT_ATTR_MAX) { + OVS_NLERR(log, + "Unknown conntrack attr (type=%d, max=%d)", + type, OVS_CT_ATTR_MAX); + return -EINVAL; + } + if (nla_len(a) < minlen || nla_len(a) > maxlen) { + OVS_NLERR(log, + "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", + type, nla_len(a), maxlen); + return -EINVAL; + } + + switch (type) { + case OVS_CT_ATTR_FLAGS: + info->flags = nla_get_u32(a); + break; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case OVS_CT_ATTR_ZONE: + info->zone.id = nla_get_u16(a); + break; +#endif +#ifdef CONFIG_NF_CONNTRACK_MARK + case OVS_CT_ATTR_MARK: { + struct md_mark *mark = nla_data(a); + + info->mark = *mark; + break; + } +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + case OVS_CT_ATTR_LABEL: { + struct md_label *label = nla_data(a); + + info->label = *label; + break; + } +#endif + case OVS_CT_ATTR_HELPER: + *helper = nla_data(a); + if (!memchr(*helper, '\0', nla_len(a))) { + OVS_NLERR(log, "Invalid conntrack helper"); + return -EINVAL; + } + break; + default: + OVS_NLERR(log, "Unknown conntrack attr (%d)", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); + return -EINVAL; + } + + return 0; +} + +bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) +{ + if (attr == OVS_KEY_ATTR_CT_STATE) + return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + attr == OVS_KEY_ATTR_CT_ZONE) + return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + attr == OVS_KEY_ATTR_CT_MARK) + return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + attr == OVS_KEY_ATTR_CT_LABEL) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } + + return false; +} + +int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, bool log) +{ + struct ovs_conntrack_info ct_info; + const char *helper = NULL; + u16 family; + int err; + + family = key_to_nfproto(key); + if (family == NFPROTO_UNSPEC) { + OVS_NLERR(log, "ct family unspecified"); + return -EINVAL; + } + + memset(&ct_info, 0, sizeof(ct_info)); + ct_info.family = family; + + nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); + + err = parse_ct(attr, &ct_info, &helper, log); + if (err) + return err; + + /* Set up template for tracking connections in specific zones. */ + ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); + if (!ct_info.ct) { + OVS_NLERR(log, "Failed to allocate conntrack template"); + return -ENOMEM; + } + if (helper) { + err = ovs_ct_add_helper(&ct_info, helper, key, log); + if (err) + goto err_free_ct; + } + + err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, + sizeof(ct_info), log); + if (err) + goto err_free_ct; + + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); + return 0; +err_free_ct: + nf_conntrack_free(ct_info.ct); + return err; +} + +int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); + if (!start) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) + return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && + nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) + return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && + nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), + &ct_info->mark)) + return -EMSGSIZE; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), + &ct_info->label)) + return -EMSGSIZE; + if (ct_info->helper) { + if (nla_put_string(skb, OVS_CT_ATTR_HELPER, + ct_info->helper->name)) + return -EMSGSIZE; + } + + nla_nest_end(skb, start); + + return 0; +} + +void ovs_ct_free_action(const struct nlattr *a) +{ + struct ovs_conntrack_info *ct_info = nla_data(a); + + if (ct_info->helper) + module_put(ct_info->helper->me); + if (ct_info->ct) + nf_ct_put(ct_info->ct); +} + +void ovs_ct_init(struct net *net) +{ + unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + if (nf_connlabels_get(net, n_bits)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } +} + +void ovs_ct_exit(struct net *net) +{ + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + if (ovs_net->xt_label) + nf_connlabels_put(net); +} diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h new file mode 100644 index 000000000000..3cb30667a7dc --- /dev/null +++ b/net/openvswitch/conntrack.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OVS_CONNTRACK_H +#define OVS_CONNTRACK_H 1 + +#include "flow.h" + +struct ovs_conntrack_info; +enum ovs_key_attr; + +#if defined(CONFIG_OPENVSWITCH_CONNTRACK) +void ovs_ct_init(struct net *); +void ovs_ct_exit(struct net *); +bool ovs_ct_verify(struct net *, enum ovs_key_attr attr); +int ovs_ct_copy_action(struct net *, const struct nlattr *, + const struct sw_flow_key *, struct sw_flow_actions **, + bool log); +int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *); + +int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, + const struct ovs_conntrack_info *); + +void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); +int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); +void ovs_ct_free_action(const struct nlattr *a); +#else +#include <linux/errno.h> + +static inline void ovs_ct_init(struct net *net) { } + +static inline void ovs_ct_exit(struct net *net) { } + +static inline bool ovs_ct_verify(struct net *net, int attr) +{ + return false; +} + +static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, + const struct sw_flow_key *key, + struct sw_flow_actions **acts, bool log) +{ + return -ENOTSUPP; +} + +static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info, + struct sk_buff *skb) +{ + return -ENOTSUPP; +} + +static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, + struct sw_flow_key *key, + const struct ovs_conntrack_info *info) +{ + return -ENOTSUPP; +} + +static inline void ovs_ct_fill_key(const struct sk_buff *skb, + struct sw_flow_key *key) +{ + key->ct.state = 0; + key->ct.zone = 0; + key->ct.mark = 0; + memset(&key->ct.label, 0, sizeof(key->ct.label)); +} + +static inline int ovs_ct_put_key(const struct sw_flow_key *key, + struct sk_buff *skb) +{ + return 0; +} + +static inline void ovs_ct_free_action(const struct nlattr *a) { } +#endif +#endif /* ovs_conntrack.h */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ffe984f5b95c..ec0f8d9cee73 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.mru = OVS_CB(skb)->mru; error = ovs_dp_upcall(dp, skb, key, &upcall); if (unlikely(error)) kfree_skb(skb); @@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, if (upcall_info->actions_len) size += nla_total_size(upcall_info->actions_len); + /* OVS_PACKET_ATTR_MRU */ + if (upcall_info->mru) + size += nla_total_size(sizeof(upcall_info->mru)); + return size; } +static void pad_packet(struct datapath *dp, struct sk_buff *skb) +{ + if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { + size_t plen = NLA_ALIGN(skb->len) - skb->len; + + if (plen > 0) + memset(skb_put(skb, plen), 0, plen); + } +} + static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info) @@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_nest_cancel(user_skb, nla); } + /* Add OVS_PACKET_ATTR_MRU */ + if (upcall_info->mru) { + if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, + upcall_info->mru)) { + err = -ENOBUFS; + goto out; + } + pad_packet(dp, user_skb); + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { @@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ - if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { - size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; - - if (plen > 0) - memset(skb_put(user_skb, plen), 0, plen); - } + pad_packet(dp, user_skb); ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; @@ -527,6 +547,7 @@ out: static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct sw_flow_actions *acts; struct sk_buff *packet; @@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct ethhdr *eth; struct vport *input_vport; + u16 mru = 0; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; @@ -564,18 +586,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) else packet->protocol = htons(ETH_P_802_2); + /* Set packet's mru */ + if (a[OVS_PACKET_ATTR_MRU]) { + mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); + packet->ignore_df = 1; + } + OVS_CB(packet)->mru = mru; + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; - err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, - &flow->key, log); + err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY], + packet, &flow->key, log); if (err) goto err_flow_free; - err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts, log); if (err) goto err_flow_free; @@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); - dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp_rcu(net, ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; @@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (!input_vport) goto err_unlock; + packet->dev = input_vport->dev; OVS_CB(packet)->input_vport = input_vport; sf_acts = rcu_dereference(flow->sf_acts); @@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, }; static const struct genl_ops dp_packet_genl_ops[] = { @@ -713,7 +744,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, /* OVS_FLOW_ATTR_ACTIONS */ if (should_fill_actions(ufid_flags)) - len += nla_total_size(acts->actions_len); + len += nla_total_size(acts->orig_len); return len + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ @@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow *flow = NULL, *new_flow; @@ -915,7 +947,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; @@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; /* Validate actions. */ - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - &acts, log); + error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], + &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); goto err_kfree_flow; @@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } ovs_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; @@ -1038,7 +1070,8 @@ error: } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ -static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, +static struct sw_flow_actions *get_flow_actions(struct net *net, + const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask, bool log) @@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts, log); + error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); if (error) { OVS_NLERR(log, "Actions may not be safe on all matching packets"); @@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) { + struct net *net = sock_net(skb->sk); struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; @@ -1084,15 +1118,15 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], + error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask, - log); + acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, + &mask, log); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; @@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } ovs_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp(net, ovs_header->dp_ifindex); if (unlikely(!dp)) { error = -ENODEV; goto err_unlock_ovs; @@ -1174,6 +1208,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow; @@ -1188,7 +1223,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { OVS_NLERR(log, @@ -1232,6 +1267,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; + struct net *net = sock_net(skb->sk); struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow = NULL; @@ -1246,8 +1282,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, - log); + err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], + NULL, log); if (unlikely(err)) return err; } @@ -2203,6 +2239,7 @@ static int __net_init ovs_init_net(struct net *net) INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); + ovs_ct_init(net); return 0; } @@ -2237,6 +2274,7 @@ static void __net_exit ovs_exit_net(struct net *dnet) struct net *net; LIST_HEAD(head); + ovs_ct_exit(dnet); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 6b28c5cedb23..4e785ab88973 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -27,6 +27,7 @@ #include <linux/u64_stats_sync.h> #include <net/ip_tunnels.h> +#include "conntrack.h" #include "flow.h" #include "flow_table.h" #include "vport.h" @@ -97,10 +98,13 @@ struct datapath { * NULL if the packet is not being tunneled. * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. + * @mru: The maximum received fragement size; 0 if the packet is not + * fragmented. */ struct ovs_skb_cb { struct ip_tunnel_info *egress_tun_info; struct vport *input_vport; + u16 mru; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) @@ -113,6 +117,7 @@ struct ovs_skb_cb { * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY. + * @mru: If not zero, Maximum received IP fragment size. */ struct dp_upcall_info { const struct ip_tunnel_info *egress_tun_info; @@ -121,6 +126,7 @@ struct dp_upcall_info { int actions_len; u32 portid; u8 cmd; + u16 mru; }; /** @@ -132,6 +138,9 @@ struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; struct vport_net vport_net; + + /* Module reference for configuring conntrack. */ + bool xt_label; }; extern int ovs_net_id; @@ -200,6 +209,10 @@ void ovs_dp_notify_wq(struct work_struct *work); int action_fifos_init(void); void action_fifos_exit(void); +/* 'KEY' must not have any bits set outside of the 'MASK' */ +#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) +#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) + #define OVS_NLERR(logging_allowed, fmt, ...) \ do { \ if (logging_allowed && net_ratelimit()) \ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8db22ef73626..9760dc43bdb9 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -49,6 +49,7 @@ #include "datapath.h" #include "flow.h" #include "flow_netlink.h" +#include "conntrack.h" u64 ovs_flow_used_time(unsigned long flow_jiffies) { @@ -687,6 +688,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, { /* Extract metadata from packet. */ if (tun_info) { + if (ip_tunnel_info_af(tun_info) != AF_INET) + return -EINVAL; memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key)); if (tun_info->options) { @@ -707,13 +710,14 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; + ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; key->recirc_id = 0; return key_extract(skb, key); } -int ovs_flow_key_extract_userspace(const struct nlattr *attr, +int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log) { @@ -722,7 +726,7 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr, memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(attr, key, log); + err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) return err; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index b62cdb3e3589..fe527d2dd4b7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -111,6 +111,14 @@ struct sw_flow_key { } nd; } ipv6; }; + struct { + /* Connection tracking fields. */ + u16 zone; + u32 mark; + u8 state; + struct ovs_key_ct_label label; + } ct; + } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow_key_range { @@ -144,6 +152,7 @@ struct sw_flow_id { struct sw_flow_actions { struct rcu_head rcu; + size_t orig_len; /* From flow_cmd_new netlink actions size */ u32 actions_len; struct nlattr actions[]; }; @@ -212,7 +221,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ -int ovs_flow_key_extract_userspace(const struct nlattr *attr, +int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index a6eb77ab1a64..e22c5bfe8575 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -290,6 +290,10 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -339,6 +343,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, + [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, + [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -534,19 +542,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_KEY; break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, nla_get_in_addr(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TOS: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, + SW_FLOW_KEY_PUT(match, tun_key.tos, nla_get_u8(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TTL: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, + SW_FLOW_KEY_PUT(match, tun_key.ttl, nla_get_u8(a), is_mask); ttl = true; break; @@ -609,7 +617,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } if (!is_mask) { - if (!match->key->tun_key.ipv4_dst) { + if (!match->key->tun_key.u.ipv4.dst) { OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } @@ -647,18 +655,18 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; - if (output->ipv4_src && + if (output->u.ipv4.src && nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, - output->ipv4_src)) + output->u.ipv4.src)) return -EMSGSIZE; - if (output->ipv4_dst && + if (output->u.ipv4.dst && nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, - output->ipv4_dst)) + output->u.ipv4.dst)) return -EMSGSIZE; - if (output->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) + if (output->tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) return -EMSGSIZE; - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) return -EMSGSIZE; if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) @@ -715,9 +723,9 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb, egress_tun_info->options_len); } -static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask, - bool log) +static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, + u64 *attrs, const struct nlattr **a, + bool is_mask, bool log) { if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -768,16 +776,47 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } + + if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { + u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + + SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); + } + if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { + u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); + + SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); + } + if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { + u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); + + SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); + } + if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { + const struct ovs_key_ct_label *cl; + + cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); + SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, + sizeof(*cl), is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); + } return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask, - bool log) +static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, + u64 attrs, const struct nlattr **a, + bool is_mask, bool log) { int err; - err = metadata_from_nlattrs(match, &attrs, a, is_mask, log); + err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); if (err) return err; @@ -1029,6 +1068,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * mask. In case the 'mask' is NULL, the flow is treated as exact match * flow. Otherwise, it is treated as a wildcarded flow, except the mask * does not include any don't care bit. + * @net: Used to determine per-namespace field support. * @match: receives the extracted flow match information. * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation @@ -1039,7 +1079,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. */ -int ovs_nla_get_match(struct sw_flow_match *match, +int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, const struct nlattr *nla_key, const struct nlattr *nla_mask, bool log) @@ -1089,7 +1129,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false, log); + err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) return err; @@ -1116,7 +1156,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, /* The userspace does not send tunnel attributes that * are 0, but we should not wildcard them nonetheless. */ - if (match->key->tun_key.ipv4_dst) + if (match->key->tun_key.u.ipv4.dst) SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); @@ -1169,7 +1209,8 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log); + err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, + log); if (err) goto free_newmask; } @@ -1250,7 +1291,7 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) * extracted from the packet itself. */ -int ovs_nla_get_flow_metadata(const struct nlattr *attr, +int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, struct sw_flow_key *key, bool log) { @@ -1266,9 +1307,10 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, memset(&match, 0, sizeof(match)); match.key = key; + memset(&key->ct, 0, sizeof(key->ct)); key->phy.in_port = DP_MAX_PORTS; - return metadata_from_nlattrs(&match, &attrs, a, false, log); + return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } static int __ovs_nla_put_key(const struct sw_flow_key *swkey, @@ -1287,7 +1329,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; - if ((swkey->tun_key.ipv4_dst || is_mask)) { + if ((swkey->tun_key.u.ipv4.dst || is_mask)) { const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) @@ -1314,6 +1356,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (ovs_ct_put_key(output, skb)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1574,6 +1619,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) case OVS_ACTION_ATTR_SET: ovs_nla_free_set_action(a); break; + case OVS_ACTION_ATTR_CT: + ovs_ct_free_action(a); + break; } } @@ -1619,6 +1667,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); acts->actions_len = (*sfa)->actions_len; + acts->orig_len = (*sfa)->orig_len; kfree(*sfa); *sfa = acts; @@ -1646,8 +1695,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa, return a; } -static int add_action(struct sw_flow_actions **sfa, int attrtype, - void *data, int len, bool log) +int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, + int len, bool log) { struct nlattr *a; @@ -1662,7 +1711,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa, int used = (*sfa)->actions_len; int err; - err = add_action(sfa, attrtype, NULL, 0, log); + err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); if (err) return err; @@ -1678,12 +1727,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); -static int validate_and_copy_sample(const struct nlattr *attr, +static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) @@ -1715,15 +1764,15 @@ static int validate_and_copy_sample(const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, + nla_data(probability), sizeof(u32), log); if (err) return err; st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); if (st_acts < 0) return st_acts; - err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, eth_type, vlan_tci, log); if (err) return err; @@ -1892,6 +1941,8 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_CT_MARK: + case OVS_KEY_ATTR_CT_LABEL: case OVS_KEY_ATTR_ETHERNET: break; @@ -2057,7 +2108,7 @@ static int copy_action(const struct nlattr *from, return 0; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) @@ -2081,7 +2132,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, - [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) + [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), + [OVS_ACTION_ATTR_CT] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2188,13 +2240,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa, + err = validate_and_copy_sample(net, a, key, depth, sfa, eth_type, vlan_tci, log); if (err) return err; skip_copy = true; break; + case OVS_ACTION_ATTR_CT: + err = ovs_ct_copy_action(net, a, key, sfa, log); + if (err) + return err; + skip_copy = true; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -2213,7 +2272,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, } /* 'key' must be the masked key. */ -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { @@ -2223,7 +2282,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, if (IS_ERR(*sfa)) return PTR_ERR(*sfa); - err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, + (*sfa)->orig_len = nla_len(attr); + err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, key->eth.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2348,6 +2408,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) if (err) return err; break; + + case OVS_ACTION_ATTR_CT: + err = ovs_ct_action_to_attr(nla_data(a), skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index acd074408f0a..07878e22e783 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,15 +45,16 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); -int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, - bool log); +int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, + struct sw_flow_key *, bool log); int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); -int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, - const struct nlattr *mask, bool log); +int ovs_nla_get_match(struct net *, struct sw_flow_match *, + const struct nlattr *key, const struct nlattr *mask, + bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ip_tunnel_info *); @@ -62,9 +63,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, const struct sw_flow_key *key, bool log); u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); +int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, + void *data, int len, bool log); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3a9d1dde76ed..d22d8e948d0f 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -426,7 +426,7 @@ static u32 flow_hash(const struct sw_flow_key *key, static int flow_key_start(const struct sw_flow_key *key) { - if (key->tun_key.ipv4_dst) + if (key->tun_key.u.ipv4.dst) return 0; else return rounddown(offsetof(struct sw_flow_key, phy), diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 1da3a14d1010..fa37c95f7339 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -26,95 +26,44 @@ #include "datapath.h" #include "vport.h" +#include "vport-netdev.h" static struct vport_ops ovs_geneve_vport_ops; - /** * struct geneve_port - Keeps track of open UDP ports - * @gs: The socket created for this port number. - * @name: vport name. + * @dst_port: destination port. */ struct geneve_port { - struct geneve_sock *gs; - char name[IFNAMSIZ]; + u16 port_no; }; -static LIST_HEAD(geneve_ports); - static inline struct geneve_port *geneve_vport(const struct vport *vport) { return vport_priv(vport); } -/* Convert 64 bit tunnel ID to 24 bit VNI. */ -static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) -{ -#ifdef __BIG_ENDIAN - vni[0] = (__force __u8)(tun_id >> 16); - vni[1] = (__force __u8)(tun_id >> 8); - vni[2] = (__force __u8)tun_id; -#else - vni[0] = (__force __u8)((__force u64)tun_id >> 40); - vni[1] = (__force __u8)((__force u64)tun_id >> 48); - vni[2] = (__force __u8)((__force u64)tun_id >> 56); -#endif -} - -/* Convert 24 bit VNI to 64 bit tunnel ID. */ -static __be64 vni_to_tunnel_id(const __u8 *vni) -{ -#ifdef __BIG_ENDIAN - return (vni[0] << 16) | (vni[1] << 8) | vni[2]; -#else - return (__force __be64)(((__force u64)vni[0] << 40) | - ((__force u64)vni[1] << 48) | - ((__force u64)vni[2] << 56)); -#endif -} - -static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) -{ - struct vport *vport = gs->rcv_data; - struct genevehdr *geneveh = geneve_hdr(skb); - int opts_len; - struct ip_tunnel_info tun_info; - __be64 key; - __be16 flags; - - opts_len = geneveh->opt_len * 4; - - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | - (geneveh->oam ? TUNNEL_OAM : 0) | - (geneveh->critical ? TUNNEL_CRIT_OPT : 0); - - key = vni_to_tunnel_id(geneveh->vni); - - ip_tunnel_info_init(&tun_info, ip_hdr(skb), - udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, flags, geneveh->options, opts_len); - - ovs_vport_receive(vport, skb, &tun_info); -} - static int geneve_get_options(const struct vport *vport, struct sk_buff *skb) { struct geneve_port *geneve_port = geneve_vport(vport); - struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport))) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) return -EMSGSIZE; return 0; } -static void geneve_tnl_destroy(struct vport *vport) +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ip_tunnel_info *egress_tun_info) { struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = htons(geneve_port->port_no); + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - geneve_sock_release(geneve_port->gs); - - ovs_vport_deferred_free(vport); + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); } static struct vport *geneve_tnl_create(const struct vport_parms *parms) @@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct geneve_port *geneve_port; - struct geneve_sock *gs; + struct net_device *dev; struct vport *vport; struct nlattr *a; - int err; u16 dst_port; + int err; if (!options) { err = -EINVAL; @@ -148,104 +97,40 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return vport; geneve_port = geneve_vport(vport); - strncpy(geneve_port->name, parms->name, IFNAMSIZ); + geneve_port->port_no = dst_port; - gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0); - if (IS_ERR(gs)) { + rtnl_lock(); + dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port); + if (IS_ERR(dev)) { + rtnl_unlock(); ovs_vport_free(vport); - return (void *)gs; + return ERR_CAST(dev); } - geneve_port->gs = gs; + dev_change_flags(dev, dev->flags | IFF_UP); + rtnl_unlock(); return vport; error: return ERR_PTR(err); } -static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) +static struct vport *geneve_create(const struct vport_parms *parms) { - const struct ip_tunnel_key *tun_key; - struct ip_tunnel_info *tun_info; - struct net *net = ovs_dp_get_net(vport->dp); - struct geneve_port *geneve_port = geneve_vport(vport); - __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; - __be16 sport; - struct rtable *rt; - struct flowi4 fl; - u8 vni[3], opts_len, *opts; - __be16 df; - int err; - - tun_info = OVS_CB(skb)->egress_tun_info; - if (unlikely(!tun_info)) { - err = -EINVAL; - goto error; - } - - tun_key = &tun_info->key; - rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto error; - } - - df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - tunnel_id_to_vni(tun_key->tun_id, vni); - skb->ignore_df = 1; - - if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { - opts = (u8 *)tun_info->options; - opts_len = tun_info->options_len; - } else { - opts = NULL; - opts_len = 0; - } - - err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, - tun_key->ipv4_dst, tun_key->ipv4_tos, - tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, opts_len, opts, - !!(tun_key->tun_flags & TUNNEL_CSUM), false); - if (err < 0) - ip_rt_put(rt); - return err; - -error: - kfree_skb(skb); - return err; -} - -static const char *geneve_get_name(const struct vport *vport) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - - return geneve_port->name; -} + struct vport *vport; -static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, - struct ip_tunnel_info *egress_tun_info) -{ - struct geneve_port *geneve_port = geneve_vport(vport); - struct net *net = ovs_dp_get_net(vport->dp); - __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport; - __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); + vport = geneve_tnl_create(parms); + if (IS_ERR(vport)) + return vport; - /* Get tp_src and tp_dst, refert to geneve_build_header(). - */ - return ovs_tunnel_get_egress_info(egress_tun_info, - ovs_dp_get_net(vport->dp), - OVS_CB(skb)->egress_tun_info, - IPPROTO_UDP, skb->mark, sport, dport); + return ovs_netdev_link(vport, parms->name); } static struct vport_ops ovs_geneve_vport_ops = { .type = OVS_VPORT_TYPE_GENEVE, - .create = geneve_tnl_create, - .destroy = geneve_tnl_destroy, - .get_name = geneve_get_name, + .create = geneve_create, + .destroy = ovs_netdev_tunnel_destroy, .get_options = geneve_get_options, - .send = geneve_tnl_send, + .send = ovs_netdev_send, .owner = THIS_MODULE, .get_egress_tun_info = geneve_get_egress_tun_info, }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index c058bbf876c3..80b3e12ec882 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -135,7 +135,7 @@ static void do_setup(struct net_device *netdev) netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; - netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH; netdev->destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4b70aaa4a746..a75011505039 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -57,7 +57,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_vport_receive(vport, skb, skb_tunnel_info(skb, AF_INET)); + ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index d14f59403c5e..40164037928e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->egress_tun_info = NULL; + OVS_CB(skb)->mru = 0; /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { @@ -586,6 +587,8 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, if (unlikely(!tun_info)) return -EINVAL; + if (ip_tunnel_info_af(tun_info) != AF_INET) + return -EINVAL; tun_key = &tun_info->key; @@ -603,9 +606,9 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info, * saddr, tp_src and tp_dst */ __ip_tunnel_info_init(egress_tun_info, - fl.saddr, tun_key->ipv4_dst, - tun_key->ipv4_tos, - tun_key->ipv4_ttl, + fl.saddr, tun_key->u.ipv4.dst, + tun_key->tos, + tun_key->ttl, tp_src, tp_dst, tun_key->tun_id, tun_key->tun_flags, diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1a689c28b5a6..b88b3ee86f07 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -254,9 +254,9 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, struct rtable *rt; memset(fl, 0, sizeof(*fl)); - fl->daddr = key->ipv4_dst; - fl->saddr = key->ipv4_src; - fl->flowi4_tos = RT_TOS(key->ipv4_tos); + fl->daddr = key->u.ipv4.dst; + fl->saddr = key->u.ipv4.src; + fl->flowi4_tos = RT_TOS(key->tos); fl->flowi4_mark = mark; fl->flowi4_proto = protocol; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 896834cd3b9a..a2f28a6d4dc5 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -438,6 +438,14 @@ static const struct proto_ops rds_proto_ops = { .sendpage = sock_no_sendpage, }; +static void rds_sock_destruct(struct sock *sk) +{ + struct rds_sock *rs = rds_sk_to_rs(sk); + + WARN_ON((&rs->rs_item != rs->rs_item.next || + &rs->rs_item != rs->rs_item.prev)); +} + static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { struct rds_sock *rs; @@ -445,6 +453,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) sock_init_data(sock, sk); sock->ops = &rds_proto_ops; sk->sk_protocol = protocol; + sk->sk_destruct = rds_sock_destruct; rs = rds_sk_to_rs(sk); spin_lock_init(&rs->rs_lock); diff --git a/net/rds/connection.c b/net/rds/connection.c index d4fecb21ca25..a50e652eb269 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -301,6 +301,8 @@ void rds_conn_shutdown(struct rds_connection *conn) wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); + wait_event(conn->c_waitq, + !test_bit(RDS_RECV_REFILL, &conn->c_flags)); conn->c_trans->conn_shutdown(conn); rds_conn_reset(conn); diff --git a/net/rds/ib.c b/net/rds/ib.c index 13814227b3b2..d020fade312c 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -366,6 +366,7 @@ void rds_ib_exit(void) rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); + rds_ib_fmr_exit(); } struct rds_transport rds_ib_transport = { @@ -401,10 +402,14 @@ int rds_ib_init(void) INIT_LIST_HEAD(&rds_ib_devices); - ret = ib_register_client(&rds_ib_client); + ret = rds_ib_fmr_init(); if (ret) goto out; + ret = ib_register_client(&rds_ib_client); + if (ret) + goto out_fmr_exit; + ret = rds_ib_sysctl_init(); if (ret) goto out_ibreg; @@ -427,6 +432,8 @@ out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); +out_fmr_exit: + rds_ib_fmr_exit(); out: return ret; } diff --git a/net/rds/ib.h b/net/rds/ib.h index 86d88ec5d556..9fc95e38659a 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -313,6 +313,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); +int rds_ib_fmr_init(void); +void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); @@ -320,7 +322,7 @@ void rds_ib_recv_exit(void); int rds_ib_recv(struct rds_connection *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); -void rds_ib_recv_refill(struct rds_connection *conn, int prefill); +void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); void rds_ib_inc_free(struct rds_incoming *inc); int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index f40d8f52b753..d150bb4aa3cb 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -135,7 +135,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ - rds_ib_recv_refill(conn, 1); + rds_ib_recv_refill(conn, 1, GFP_KERNEL); /* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); @@ -640,6 +640,15 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) (atomic_read(&ic->i_signaled_sends) == 0)); tasklet_kill(&ic->i_recv_tasklet); + /* first destroy the ib state that generates callbacks */ + if (ic->i_cm_id->qp) + rdma_destroy_qp(ic->i_cm_id); + if (ic->i_send_cq) + ib_destroy_cq(ic->i_send_cq); + if (ic->i_recv_cq) + ib_destroy_cq(ic->i_recv_cq); + + /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * @@ -663,12 +672,6 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) if (ic->i_recvs) rds_ib_recv_clear_ring(ic); - if (ic->i_cm_id->qp) - rdma_destroy_qp(ic->i_cm_id); - if (ic->i_send_cq) - ib_destroy_cq(ic->i_send_cq); - if (ic->i_recv_cq) - ib_destroy_cq(ic->i_recv_cq); rdma_destroy_id(ic->i_cm_id); /* diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 657ba9f5d308..251d1ce0b7c7 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -83,6 +83,25 @@ struct rds_ib_mr_pool { struct ib_fmr_attr fmr_attr; }; +struct workqueue_struct *rds_ib_fmr_wq; + +int rds_ib_fmr_init(void) +{ + rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd"); + if (!rds_ib_fmr_wq) + return -ENOMEM; + return 0; +} + +/* By the time this is called all the IB devices should have been torn down and + * had their pools freed. As each pool is freed its work struct is waited on, + * so the pool flushing work queue should be idle by the time we get here. + */ +void rds_ib_fmr_exit(void) +{ + destroy_workqueue(rds_ib_fmr_wq); +} + static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **); static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); static void rds_ib_mr_pool_flush_worker(struct work_struct *work); @@ -151,12 +170,17 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) struct rds_ib_device *rds_ibdev_old; rds_ibdev_old = rds_ib_get_device(ipaddr); - if (rds_ibdev_old) { + if (!rds_ibdev_old) + return rds_ib_add_ipaddr(rds_ibdev, ipaddr); + + if (rds_ibdev_old != rds_ibdev) { rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); rds_ib_dev_put(rds_ibdev_old); + return rds_ib_add_ipaddr(rds_ibdev, ipaddr); } + rds_ib_dev_put(rds_ibdev_old); - return rds_ib_add_ipaddr(rds_ibdev, ipaddr); + return 0; } void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) @@ -336,8 +360,6 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) goto out_no_cigar; } - memset(ibmr, 0, sizeof(*ibmr)); - ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | @@ -485,7 +507,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) /* FIXME we need a way to tell a r/w MR * from a r/o MR */ - BUG_ON(irqs_disabled()); + WARN_ON(!page->mapping && irqs_disabled()); set_page_dirty(page); put_page(page); } @@ -523,11 +545,13 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr /* * given an llist of mrs, put them all into the list_head for more processing */ -static void llist_append_to_list(struct llist_head *llist, struct list_head *list) +static unsigned int llist_append_to_list(struct llist_head *llist, + struct list_head *list) { struct rds_ib_mr *ibmr; struct llist_node *node; struct llist_node *next; + unsigned int count = 0; node = llist_del_all(llist); while (node) { @@ -535,7 +559,9 @@ static void llist_append_to_list(struct llist_head *llist, struct list_head *lis ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); node = next; + count++; } + return count; } /* @@ -576,7 +602,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); unsigned long unpinned = 0; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; int ret = 0; rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); @@ -618,8 +644,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ - llist_append_to_list(&pool->drop_list, &unmap_list); - llist_append_to_list(&pool->free_list, &unmap_list); + dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); + dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) llist_append_to_list(&pool->clean_list, &unmap_list); @@ -647,7 +673,6 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, kfree(ibmr); nfreed++; } - ncleaned++; } if (!list_empty(&unmap_list)) { @@ -673,7 +698,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, } atomic_sub(unpinned, &pool->free_pinned); - atomic_sub(ncleaned, &pool->dirty_count); + atomic_sub(dirty_to_clean, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); out: @@ -710,16 +735,18 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || - atomic_read(&pool->dirty_count) >= pool->max_items / 10) - schedule_delayed_work(&pool->flush_worker, 10); + atomic_read(&pool->dirty_count) >= pool->max_items / 5) + queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { rds_ib_flush_mr_pool(pool, 0, NULL); } else { /* We get here if the user created a MR marked - * as use_once and invalidate at the same time. */ - schedule_delayed_work(&pool->flush_worker, 10); + * as use_once and invalidate at the same time. + */ + queue_delayed_work(rds_ib_fmr_wq, + &pool->flush_worker, 10); } } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index cac5b4506ee3..6bbe62060060 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -297,7 +297,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic } static int rds_ib_recv_refill_one(struct rds_connection *conn, - struct rds_ib_recv_work *recv, int prefill) + struct rds_ib_recv_work *recv, gfp_t gfp) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_sge *sge; @@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, gfp_t slab_mask = GFP_NOWAIT; gfp_t page_mask = GFP_NOWAIT; - if (prefill) { + if (gfp & __GFP_WAIT) { slab_mask = GFP_KERNEL; page_mask = GFP_HIGHUSER; } @@ -347,6 +347,24 @@ out: return ret; } +static int acquire_refill(struct rds_connection *conn) +{ + return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0; +} + +static void release_refill(struct rds_connection *conn) +{ + clear_bit(RDS_RECV_REFILL, &conn->c_flags); + + /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a + * hot path and finding waiters is very rare. We don't want to walk + * the system-wide hashed waitqueue buckets in the fast path only to + * almost never find waiters. + */ + if (waitqueue_active(&conn->c_waitq)) + wake_up_all(&conn->c_waitq); +} + /* * This tries to allocate and post unused work requests after making sure that * they have all the allocations they need to queue received fragments into @@ -354,15 +372,23 @@ out: * * -1 is returned if posting fails due to temporary resource exhaustion. */ -void rds_ib_recv_refill(struct rds_connection *conn, int prefill) +void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) { struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_recv_work *recv; struct ib_recv_wr *failed_wr; unsigned int posted = 0; int ret = 0; + bool can_wait = !!(gfp & __GFP_WAIT); u32 pos; + /* the goal here is to just make sure that someone, somewhere + * is posting buffers. If we can't get the refill lock, + * let them do their thing + */ + if (!acquire_refill(conn)) + return; + while ((prefill || rds_conn_up(conn)) && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { if (pos >= ic->i_recv_ring.w_nr) { @@ -372,7 +398,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill) } recv = &ic->i_recvs[pos]; - ret = rds_ib_recv_refill_one(conn, recv, prefill); + ret = rds_ib_recv_refill_one(conn, recv, gfp); if (ret) { break; } @@ -402,6 +428,24 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill) if (ret) rds_ib_ring_unalloc(&ic->i_recv_ring, 1); + + release_refill(conn); + + /* if we're called from the softirq handler, we'll be GFP_NOWAIT. + * in this case the ring being low is going to lead to more interrupts + * and we can safely let the softirq code take care of it unless the + * ring is completely empty. + * + * if we're called from krdsd, we'll be GFP_KERNEL. In this case + * we might have raced with the softirq code while we had the refill + * lock held. Use rds_ib_ring_low() instead of ring_empty to decide + * if we should requeue. + */ + if (rds_conn_up(conn) && + ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || + rds_ib_ring_empty(&ic->i_recv_ring))) { + queue_delayed_work(rds_wq, &conn->c_recv_w, 1); + } } /* @@ -982,10 +1026,17 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic, } /* - * It's very important that we only free this ring entry if we've truly - * freed the resources allocated to the entry. The refilling path can - * leak if we don't. + * rds_ib_process_recv() doesn't always consume the frag, and + * we might not have called it at all if the wc didn't indicate + * success. We already unmapped the frag's pages, though, and + * the following rds_ib_ring_free() call tells the refill path + * that it will not find an allocated frag here. Make sure we + * keep that promise by freeing a frag that's still on the ring. */ + if (recv->r_frag) { + rds_ib_frag_free(ic, recv->r_frag); + recv->r_frag = NULL; + } rds_ib_ring_free(&ic->i_recv_ring, 1); } } @@ -1016,7 +1067,7 @@ void rds_ib_recv_tasklet_fn(unsigned long data) rds_ib_stats_inc(s_ib_rx_ring_empty); if (rds_ib_ring_low(&ic->i_recv_ring)) - rds_ib_recv_refill(conn, 0); + rds_ib_recv_refill(conn, 0, GFP_NOWAIT); } int rds_ib_recv(struct rds_connection *conn) @@ -1025,8 +1076,10 @@ int rds_ib_recv(struct rds_connection *conn) int ret = 0; rdsdebug("conn %p\n", conn); - if (rds_conn_up(conn)) + if (rds_conn_up(conn)) { rds_ib_attempt_ack(ic); + rds_ib_recv_refill(conn, 0, GFP_KERNEL); + } return ret; } @@ -1049,9 +1102,10 @@ int rds_ib_recv_init(void) rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", sizeof(struct rds_page_frag), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!rds_ib_frag_slab) + if (!rds_ib_frag_slab) { kmem_cache_destroy(rds_ib_incoming_slab); - else + rds_ib_incoming_slab = NULL; + } else ret = 0; out: return ret; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 5d0a704fa039..c576ebeb4115 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -709,6 +709,11 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_op = ic->i_data_op; prev->s_wr.send_flags |= IB_SEND_SOLICITED; + if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) { + ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; + prev->s_wr.send_flags |= IB_SEND_SIGNALED; + nr_sig++; + } ic->i_data_op = NULL; } diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 40084d843e9f..4c93badeabf2 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -435,9 +435,10 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) /* If the MR was marked as invalidate, this will * trigger an async flush. */ - if (zot_me) + if (zot_me) { rds_destroy_mr(mr); - rds_mr_put(mr); + rds_mr_put(mr); + } } void rds_rdma_free_op(struct rm_rdma_op *ro) @@ -451,7 +452,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) * is the case for a RDMA_READ which copies from remote * to local memory */ if (!ro->op_write) { - BUG_ON(irqs_disabled()); + WARN_ON(!page->mapping && irqs_disabled()); set_page_dirty(page); } put_page(page); @@ -658,6 +659,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); if (ret < 0) goto out; + else + ret = 0; rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n", nr_bytes, nr, iov->bytes, iov->addr); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 208240836043..b9b40af5345b 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -34,6 +34,7 @@ #include <rdma/rdma_cm.h> #include "rdma_transport.h" +#include "ib.h" static struct rdma_cm_id *rds_rdma_listen_id; @@ -82,8 +83,18 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ROUTE_RESOLVED: - /* XXX worry about racing with listen acceptance */ - ret = trans->cm_initiate_connect(cm_id); + /* Connection could have been dropped so make sure the + * cm_id is valid before proceeding + */ + if (conn) { + struct rds_ib_connection *ibic; + + ibic = conn->c_transport_data; + if (ibic && ibic->i_cm_id == cm_id) + ret = trans->cm_initiate_connect(cm_id); + else + rds_conn_drop(conn); + } break; case RDMA_CM_EVENT_ESTABLISHED: diff --git a/net/rds/rds.h b/net/rds/rds.h index 9005fb0586f6..afb4048d0cfd 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -80,6 +80,7 @@ enum { #define RDS_LL_SEND_FULL 0 #define RDS_RECONNECT_PENDING 1 #define RDS_IN_XMIT 2 +#define RDS_RECV_REFILL 3 struct rds_connection { struct hlist_node c_hash_node; diff --git a/net/rds/send.c b/net/rds/send.c index 2581b8e3dbe7..4df61a515b83 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -282,26 +282,34 @@ restart: /* The transport either sends the whole rdma or none of it */ if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { rm->m_final_op = &rm->rdma; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue + */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); - if (ret) + if (ret) { + clear_bit(RDS_MSG_MAPPED, &rm->m_flags); + wake_up_interruptible(&rm->m_flush_wait); break; + } conn->c_xmit_rdma_sent = 1; - /* The transport owns the mapped memory for now. - * You can't unmap it while it's on the send queue */ - set_bit(RDS_MSG_MAPPED, &rm->m_flags); } if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { rm->m_final_op = &rm->atomic; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue + */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); - if (ret) + if (ret) { + clear_bit(RDS_MSG_MAPPED, &rm->m_flags); + wake_up_interruptible(&rm->m_flush_wait); break; + } conn->c_xmit_atomic_sent = 1; - /* The transport owns the mapped memory for now. - * You can't unmap it while it's on the send queue */ - set_bit(RDS_MSG_MAPPED, &rm->m_flags); } /* @@ -411,7 +419,8 @@ over_batch: */ if (ret == 0) { smp_mb(); - if (!list_empty(&conn->c_send_queue) && + if ((test_bit(0, &conn->c_map_queued) || + !list_empty(&conn->c_send_queue)) && send_gen == conn->c_send_gen) { rds_stats_inc(s_send_lock_queue_raced); goto restart; @@ -769,8 +778,22 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) while (!list_empty(&list)) { rm = list_entry(list.next, struct rds_message, m_sock_item); list_del_init(&rm->m_sock_item); - rds_message_wait(rm); + + /* just in case the code above skipped this message + * because RDS_MSG_ON_CONN wasn't set, run it again here + * taking m_rs_lock is the only thing that keeps us + * from racing with ack processing. + */ + spin_lock_irqsave(&rm->m_rs_lock, flags); + + spin_lock(&rs->rs_lock); + __rds_send_complete(rs, rm, RDS_RDMA_CANCELED); + spin_unlock(&rs->rs_lock); + + rm->m_rs = NULL; + spin_unlock_irqrestore(&rm->m_rs_lock, flags); + rds_message_put(rm); } } @@ -992,6 +1015,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } + if (payload_len > rds_sk_sndbuf(rs)) { + ret = -EMSGSIZE; + goto out; + } + /* size of rm including all sgs */ ret = rds_rm_size(msg, payload_len); if (ret < 0) @@ -1064,11 +1092,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, &queued)) { rds_stats_inc(s_send_queue_full); - /* XXX make sure this is reasonable */ - if (payload_len > rds_sk_sndbuf(rs)) { - ret = -EMSGSIZE; - goto out; - } + if (nonblock) { ret = -EAGAIN; goto out; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b087087ccfa9..06e7c4a37245 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -36,7 +36,7 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -void tcf_hash_destroy(struct tc_action *a) +static void tcf_hash_destroy(struct tc_action *a) { struct tcf_common *p = a->priv; struct tcf_hashinfo *hinfo = a->ops->hinfo; @@ -52,7 +52,6 @@ void tcf_hash_destroy(struct tc_action *a) */ call_rcu(&p->tcfc_rcu, free_tcf); } -EXPORT_SYMBOL(tcf_hash_destroy); int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1b97dabc621a..559bfa011bda 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { struct tcf_bpf *prog = act->priv; + struct bpf_prog *filter; int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; if (unlikely(!skb_mac_header_was_set(skb))) return TC_ACT_UNSPEC; - spin_lock(&prog->tcf_lock); - - prog->tcf_tm.lastuse = jiffies; - bstats_update(&prog->tcf_bstats, skb); + tcf_lastuse_update(&prog->tcf_tm); + bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); - /* Needed here for accessing maps. */ rcu_read_lock(); + filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - filter_res = BPF_PROG_RUN(prog->filter, skb); + filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - filter_res = BPF_PROG_RUN(prog->filter, skb); + filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); @@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, break; case TC_ACT_SHOT: action = filter_res; - prog->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); break; case TC_ACT_UNSPEC: action = prog->tcf_action; @@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, break; } - spin_unlock(&prog->tcf_lock); return action; } @@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, struct tcf_bpf_cfg *cfg) { cfg->is_ebpf = tcf_bpf_is_ebpf(prog); - cfg->filter = prog->filter; + /* updates to prog->filter are prevented, since it's called either + * with rtnl lock or during final cleanup in rcu callback + */ + cfg->filter = rcu_dereference_protected(prog->filter, 1); cfg->bpf_ops = prog->bpf_ops; cfg->bpf_name = prog->bpf_name; @@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, act, bind)) { ret = tcf_hash_create(parm->index, est, act, - sizeof(*prog), bind, false); + sizeof(*prog), bind, true); if (ret < 0) return ret; @@ -325,9 +326,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, goto out; prog = to_bpf(act); - spin_lock_bh(&prog->tcf_lock); + ASSERT_RTNL(); - if (ret != ACT_P_CREATED) + if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); prog->bpf_ops = cfg.bpf_ops; @@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog->bpf_fd = cfg.bpf_fd; prog->tcf_action = parm->action; - prog->filter = cfg.filter; - - spin_unlock_bh(&prog->tcf_lock); + rcu_assign_pointer(prog->filter, cfg.filter); - if (res == ACT_P_CREATED) + if (res == ACT_P_CREATED) { tcf_hash_insert(act); - else + } else { + /* make sure the program being replaced is no longer executing */ + synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); + } return res; out: diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f2b540220ad0..5019a47b9270 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = a->priv; + struct nf_conntrack_zone zone; struct nf_conn *c; int proto; @@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, proto, &tuple)) goto out; - thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + zone.id = ca->zone; + zone.dir = NF_CT_DEFAULT_ZONE_DIR; + + thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple); if (!thash) goto out; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 5be0b3c1c5b0..b7c4ead8b5a8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -162,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, goto drop; tcph = (void *)(skb_network_header(skb) + ihl); - inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, + true); break; } case IPPROTO_UDP: @@ -178,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, udph = (void *)(skb_network_header(skb) + ihl); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, - new_addr, 1); + new_addr, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } @@ -231,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 0); + false); break; } default: diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 02fa82792dab..f9c9fc075fe6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static void -rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) +static void rsvp_delete_filter_rcu(struct rcu_head *head) { - tcf_unbind_filter(tp, &f->res); + struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + tcf_exts_destroy(&f->exts); - kfree_rcu(f, rcu); + kfree(f); +} + +static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) +{ + tcf_unbind_filter(tp, &f->res); + /* all classifiers are required to call tcf_exts_destroy() after rcu + * grace period, since converted-to-rcu actions are relying on that + * in cleanup() callback + */ + call_rcu(&f->rcu, rsvp_delete_filter_rcu); } static bool rsvp_destroy(struct tcf_proto *tp, bool force) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index a557dbaf5afe..944c8ff45055 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -27,6 +27,7 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; + struct rcu_head rcu; }; struct tcindex_filter { @@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } -static int -tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static void tcindex_destroy_rexts(struct rcu_head *head) +{ + struct tcindex_filter_result *r; + + r = container_of(head, struct tcindex_filter_result, rcu); + tcf_exts_destroy(&r->exts); +} + +static void tcindex_destroy_fexts(struct rcu_head *head) +{ + struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + + tcf_exts_destroy(&f->result.exts); + kfree(f); +} + +static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; @@ -162,9 +178,14 @@ found: rcu_assign_pointer(*walk, rtnl_dereference(f->next)); } tcf_unbind_filter(tp, &r->res); - tcf_exts_destroy(&r->exts); + /* all classifiers are required to call tcf_exts_destroy() after rcu + * grace period, since converted-to-rcu actions are relying on that + * in cleanup() callback + */ if (f) - kfree_rcu(f, rcu); + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + call_rcu(&r->rcu, tcindex_destroy_rexts); return 0; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cab9e9b43967..4fbb67430ce4 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force) return false; } } + + if (tp_c->refcnt > 1) + return false; + + if (tp_c->refcnt == 1) { + struct tc_u_hnode *ht; + + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) + if (!ht_empty(ht)) + return false; + } } if (root_ht && --root_ht->refcnt == 0) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f06aa01d60fd..f43c8f33f09e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1806,51 +1806,45 @@ done: * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) { __be16 protocol = tc_skb_protocol(skb); - int err; +#ifdef CONFIG_NET_CLS_ACT + const struct tcf_proto *old_tp = tp; + int limit = 0; +reclassify: +#endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; + if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; - err = tp->classify(skb, tp, res); + err = tp->classify(skb, tp, res); +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) + goto reset; +#endif if (err >= 0) return err; } - return -1; -} -EXPORT_SYMBOL(tc_classify_compat); -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - int err = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tcf_proto *otp = tp; - int limit = 0; -reclassify: -#endif - - err = tc_classify_compat(skb, tp, res); + return -1; #ifdef CONFIG_NET_CLS_ACT - if (err == TC_ACT_RECLASSIFY) { - tp = otp; - - if (unlikely(limit++ >= MAX_REC_LOOP)) { - net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0xffff, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - goto reclassify; +reset: + if (unlikely(limit++ >= MAX_REC_LOOP)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", + tp->q->ops->id, tp->prio & 0xffff, + ntohs(tp->protocol)); + return TC_ACT_SHOT; } + + tp = old_tp; + goto reclassify; #endif - return err; } EXPORT_SYMBOL(tc_classify); @@ -1947,6 +1941,7 @@ static int __init pktsched_init(void) register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); + register_qdisc(&noqueue_qdisc_ops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index e3e2cc5fd068..1911af3ca7c0 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -375,7 +375,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tc_classify_compat(skb, fl, &res); + result = tc_classify(skb, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index beeb75f80fdb..c538d9e4a8f6 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -240,7 +240,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tc_classify_compat(skb, fl, &res); + result = tc_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 6a783afe4960..665bde07916b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -201,7 +201,7 @@ static bool choke_classify(struct sk_buff *skb, int result; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 338706092c27..f26bdea875c1 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -331,7 +331,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 66700a6116aa..c4d45fd8c551 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -230,7 +230,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tc_classify(skb, fl, &res); + int result = tc_classify(skb, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2e2398cfc694..2177eac0a61e 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -54,7 +54,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) bool is_bfifo = sch->ops == &bfifo_qdisc_ops; if (opt == NULL) { - u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; + u32 limit = qdisc_dev(sch)->tx_queue_len; if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a9ba030435a2..4c834e93dafb 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -92,7 +92,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, filter, &res); + result = tc_classify(skb, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 942fea8405a4..cb5d4ad32946 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -416,33 +416,25 @@ struct Qdisc noop_qdisc = { }; EXPORT_SYMBOL(noop_qdisc); -static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { +static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt) +{ + /* register_qdisc() assigns a default of noop_enqueue if unset, + * but __dev_queue_xmit() treats noqueue only as such + * if this is NULL - so clear it here. */ + qdisc->enqueue = NULL; + return 0; +} + +struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .id = "noqueue", .priv_size = 0, + .init = noqueue_init, .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, .owner = THIS_MODULE, }; -static struct Qdisc noqueue_qdisc; -static struct netdev_queue noqueue_netdev_queue = { - .qdisc = &noqueue_qdisc, - .qdisc_sleeping = &noqueue_qdisc, -}; - -static struct Qdisc noqueue_qdisc = { - .enqueue = NULL, - .dequeue = noop_dequeue, - .flags = TCQ_F_BUILTIN, - .ops = &noqueue_qdisc_ops, - .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), - .dev_queue = &noqueue_netdev_queue, - .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock), -}; - - static const u8 prio2band[TC_PRIO_MAX + 1] = { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; @@ -733,18 +725,19 @@ static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) { - struct Qdisc *qdisc = &noqueue_qdisc; + struct Qdisc *qdisc; + const struct Qdisc_ops *ops = default_qdisc_ops; - if (dev->tx_queue_len && !(dev->priv_flags & IFF_NO_QUEUE)) { - qdisc = qdisc_create_dflt(dev_queue, - default_qdisc_ops, TC_H_ROOT); - if (!qdisc) { - netdev_info(dev, "activation failed\n"); - return; - } - if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + if (dev->priv_flags & IFF_NO_QUEUE) + ops = &noqueue_qdisc_ops; + + qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT); + if (!qdisc) { + netdev_info(dev, "activation failed\n"); + return; } + if (!netif_is_multiqueue(dev)) + qdisc->flags |= TCQ_F_ONETXQUEUE; dev_queue->qdisc_sleeping = qdisc; } @@ -756,7 +749,6 @@ static void attach_default_qdiscs(struct net_device *dev) txq = netdev_get_tx_queue(dev, 0); if (!netif_is_multiqueue(dev) || - dev->tx_queue_len == 0 || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; @@ -781,7 +773,7 @@ static void transition_one_qdisc(struct net_device *dev, clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (need_watchdog_p && new_qdisc != &noqueue_qdisc) { + if (need_watchdog_p) { dev_queue->trans_start = 0; *need_watchdog_p = 1; } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index abb9f2fec28f..80105109f756 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -512,11 +512,9 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_LIMIT]) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - else { - u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1; - - sch->limit = qlen * psched_mtu(qdisc_dev(sch)); - } + else + sch->limit = qdisc_dev(sch)->tx_queue_len + * psched_mtu(qdisc_dev(sch)); return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index e6c7416d0332..b7ebe2c87586 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1165,7 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { + while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f1acb0f60dc3..15ccd7f8fb2a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -229,7 +229,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { + while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: @@ -1048,11 +1048,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); - else { + else q->direct_qlen = qdisc_dev(sch)->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ - q->direct_qlen = 2; - } + if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 42dd218871e0..4e904ca0af9d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tc_classify(skb, fl, &res); + err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index ade9445a55ab..5abfe44678d4 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -130,12 +130,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt) q->unplug_indefinite = false; if (opt == NULL) { - /* We will set a default limit of 100 pkts (~150kB) - * in case tx_queue_len is not available. The - * default value is completely arbitrary. - */ - u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100; - q->limit = pkt_limit * psched_mtu(qdisc_dev(sch)); + q->limit = qdisc_dev(sch)->tx_queue_len + * psched_mtu(qdisc_dev(sch)); } else { struct tc_plug_qopt *ctl = nla_data(opt); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8e5cd34aaa74..ba6487f2741f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tc_classify(skb, fl, &res); + err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ffaeea63d473..3dc3a6e56052 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -717,7 +717,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 4b815193326c..5bbb6332ec57 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -258,7 +258,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -502,7 +502,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) limit = ctl->limit; if (limit == 0) - limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); + limit = qdisc_dev(sch)->tx_queue_len; child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); if (IS_ERR(child)) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 52f75a5473e1..3abab534eb5c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -179,7 +179,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8c1c86..a655ddc3f353 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3132,11 +3132,18 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_IPV4_ADDRESS: if (length != sizeof(sctp_ipv4addr_param_t)) return false; + /* ensure there is only one addr param and it's in the + * beginning of addip_hdr params, or we reject it. + */ + if (param.v != addip->addip_hdr.params) + return false; addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: if (length != sizeof(sctp_ipv6addr_param_t)) return false; + if (param.v != addip->addip_hdr.params) + return false; addr_param_seen = true; break; case SCTP_PARAM_ADD_IP: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fef2acdf4a2e..85e6f03aeb70 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -702,7 +702,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * outstanding data and rely on the retransmission limit be reached * to shutdown the association. */ - if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING) + if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good diff --git a/net/tipc/link.c b/net/tipc/link.c index f067e5425560..75db07c78a69 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -351,11 +351,11 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: + case LINK_SYNCH_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: l->state = LINK_SYNCHING; break; - case LINK_SYNCH_END_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: @@ -1330,6 +1330,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); + u16 rcv_nxt = l->rcv_nxt; char *if_name; int rc = 0; @@ -1393,7 +1394,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; /* Send NACK if peer has sent pkts we haven't received yet */ - if (more(peers_snd_nxt, l->rcv_nxt)) + if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || (msg_probe(hdr))) tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, diff --git a/net/tipc/node.c b/net/tipc/node.c index 7c191641b44f..703875fd6cde 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,6 +423,8 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, /* There is still a working link => initiate failover */ tnl = node_active_link(n, 0); + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); @@ -565,6 +567,8 @@ void tipc_node_check_dest(struct net *net, u32 onode, goto exit; } tipc_link_reset(l); + if (n->state == NODE_FAILINGOVER) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1075,7 +1079,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt; int state = n->state; - struct tipc_link *l, *pl = NULL; + struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; int i, pb_id; @@ -1129,7 +1133,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } /* Open parallel link when tunnel link reaches synch point */ - if ((n->state == NODE_FAILINGOVER) && !tipc_link_is_failingover(l)) { + if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) { if (!more(rcv_nxt, n->sync_point)) return true; tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); @@ -1138,6 +1142,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } + /* No synching needed if only one link */ + if (!pl || !tipc_link_is_up(pl)) + return true; + /* Initiate or update synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { syncpt = iseqno + exp_pkts - 1; @@ -1156,13 +1164,20 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Open tunnel link when parallel link reaches synch point */ if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { - if (pl) - dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq)); - if (!pl || more(dlv_nxt, n->sync_point)) { - tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT); + if (tipc_link_is_synching(l)) { + tnl = l; + } else { + tnl = pl; + pl = l; + } + dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + if (more(dlv_nxt, n->sync_point)) { + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); return true; } + if (l == pl) + return true; if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) return true; if (usr == LINK_PROTOCOL) |