From c9ddbac9c89110f77cb0fa07e634aaf1194899aa Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Jul 2015 18:27:46 -0500 Subject: PCI: Restore PCI_MSIX_FLAGS_BIRMASK definition 09a2c73ddfc7 ("PCI: Remove unused PCI_MSIX_FLAGS_BIRMASK definition") removed PCI_MSIX_FLAGS_BIRMASK from an exported header because it was unused in the kernel. But that breaks user programs that were using it (QEMU in particular). Restore the PCI_MSIX_FLAGS_BIRMASK definition. [bhelgaas: changelog] Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.13+ --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index efe3443572ba..413417f3707b 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -319,6 +319,7 @@ #define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ #define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ +#define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ /* MSI-X Table entry format */ -- cgit v1.2.3 From 5c48f1201744233d4f235c7dd916d5196ed20716 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Jun 2015 09:58:06 +0200 Subject: mac80211: remove exposing 'mfp' to drivers There's no driver using this, so remove it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 -- net/mac80211/cfg.c | 1 - net/mac80211/mlme.c | 6 +----- 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6b1077c2a63f..43dbddfa06c0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1675,7 +1675,6 @@ struct ieee80211_sta_rates { * @tdls: indicates whether the STA is a TDLS peer * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only * valid if the STA is a TDLS peer in the first place. - * @mfp: indicates whether the STA uses management frame protection or not. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { @@ -1693,7 +1692,6 @@ struct ieee80211_sta { struct ieee80211_sta_rates __rcu *rates; bool tdls; bool tdls_initiator; - bool mfp; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bf7023f6c327..5fc7788e2ff2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1076,7 +1076,6 @@ static int sta_apply_parameters(struct ieee80211_local *local, } if (mask & BIT(NL80211_STA_FLAG_MFP)) { - sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP)); if (set & BIT(NL80211_STA_FLAG_MFP)) set_sta_flag(sta, WLAN_STA_MFP); else diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9b2cc278ac2a..ae5d6c48272d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3034,12 +3034,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); - if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) { + if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) set_sta_flag(sta, WLAN_STA_MFP); - sta->sta.mfp = true; - } else { - sta->sta.mfp = false; - } sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS; -- cgit v1.2.3 From af9f9b22beee70aae58651cdbb9d6375e6e51797 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jun 2015 16:02:32 +0200 Subject: mac80211: don't store napi struct When introducing multiple RX queues, a single NAPI struct will not be sufficient. Instead of trying to store multiple, simply change the API to have the NAPI struct passed to the RX function. This of course means that drivers using rx_irqsafe() cannot use NAPI, but that seems a reasonable trade-off, particularly since only two of all drivers are currently using it at all. While at it, we can now remove the IEEE80211_RX_REORDER_TIMER flag again since this code path cannot have a napi struct anyway. Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlwifi/dvm/dev.h | 2 ++ drivers/net/wireless/iwlwifi/dvm/main.c | 3 ++- drivers/net/wireless/iwlwifi/dvm/rx.c | 2 +- drivers/net/wireless/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/iwlwifi/mvm/ops.c | 3 ++- drivers/net/wireless/iwlwifi/mvm/rx.c | 2 +- include/net/mac80211.h | 37 +++++++++++++++++++++------------ net/mac80211/ieee80211_i.h | 6 +----- net/mac80211/main.c | 12 ----------- net/mac80211/rx.c | 18 +++++++++------- 10 files changed, 44 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index 3811878ab9cd..074977ede343 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -669,6 +669,8 @@ struct iwl_priv { /* ieee device used by generic ieee processing code */ struct ieee80211_hw *hw; + struct napi_struct *napi; + struct list_head calib_results; struct workqueue_struct *workqueue; diff --git a/drivers/net/wireless/iwlwifi/dvm/main.c b/drivers/net/wireless/iwlwifi/dvm/main.c index 234e30f498b2..644819563cf0 100644 --- a/drivers/net/wireless/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/iwlwifi/dvm/main.c @@ -2037,7 +2037,8 @@ static void iwl_napi_add(struct iwl_op_mode *op_mode, { struct iwl_priv *priv = IWL_OP_MODE_GET_DVM(op_mode); - ieee80211_napi_add(priv->hw, napi, napi_dev, poll, weight); + netif_napi_add(napi_dev, napi, poll, weight); + priv->napi = napi; } static const struct iwl_op_mode_ops iwl_dvm_ops = { diff --git a/drivers/net/wireless/iwlwifi/dvm/rx.c b/drivers/net/wireless/iwlwifi/dvm/rx.c index debec963c610..5a91f5d6b1dc 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rx.c +++ b/drivers/net/wireless/iwlwifi/dvm/rx.c @@ -786,7 +786,7 @@ static void iwlagn_pass_packet_to_mac80211(struct iwl_priv *priv, memcpy(IEEE80211_SKB_RXCB(skb), stats, sizeof(*stats)); - ieee80211_rx(priv->hw, skb); + ieee80211_rx_napi(priv->hw, skb, priv->napi); } static u32 iwlagn_translate_rx_status(struct iwl_priv *priv, u32 decrypt_in) diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 2d4bad5fe825..605f57a2c6be 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -559,6 +559,7 @@ struct iwl_mvm { const struct iwl_cfg *cfg; struct iwl_phy_db *phy_db; struct ieee80211_hw *hw; + struct napi_struct *napi; /* for protecting access to iwl_mvm */ struct mutex mutex; diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index e4fa50075ffd..3967df63e0f3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -1316,7 +1316,8 @@ static void iwl_mvm_napi_add(struct iwl_op_mode *op_mode, { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); - ieee80211_napi_add(mvm->hw, napi, napi_dev, poll, weight); + netif_napi_add(napi_dev, napi, poll, weight); + mvm->napi = napi; } static const struct iwl_op_mode_ops iwl_mvm_ops = { diff --git a/drivers/net/wireless/iwlwifi/mvm/rx.c b/drivers/net/wireless/iwlwifi/mvm/rx.c index 8f1d93b7a13a..9ff0b4321df3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/iwlwifi/mvm/rx.c @@ -129,7 +129,7 @@ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, fraglen, rxb->truesize); } - ieee80211_rx(mvm->hw, skb); + ieee80211_rx_napi(mvm->hw, skb, mvm->napi); } /* diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 43dbddfa06c0..ff68b8c4ab35 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3694,20 +3694,28 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); void ieee80211_restart_hw(struct ieee80211_hw *hw); /** - * ieee80211_napi_add - initialize mac80211 NAPI context - * @hw: the hardware to initialize the NAPI context on - * @napi: the NAPI context to initialize - * @napi_dev: dummy NAPI netdevice, here to not waste the space if the - * driver doesn't use NAPI - * @poll: poll function - * @weight: default weight + * ieee80211_rx_napi - receive frame from NAPI context * - * See also netif_napi_add(). + * Use this function to hand received frames to mac80211. The receive + * buffer in @skb must start with an IEEE 802.11 header. In case of a + * paged @skb is used, the driver is recommended to put the ieee80211 + * header of the frame on the linear part of the @skb to avoid memory + * allocation and/or memcpy by the stack. + * + * This function may not be called in IRQ context. Calls to this function + * for a single hardware must be synchronized against each other. Calls to + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be + * mixed for a single hardware. Must not run concurrently with + * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * + * This function must be called with BHs disabled. + * + * @hw: the hardware this frame came in on + * @skb: the buffer to receive, owned by mac80211 after this call + * @napi: the NAPI context */ -void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, - struct net_device *napi_dev, - int (*poll)(struct napi_struct *, int), - int weight); +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, + struct napi_struct *napi); /** * ieee80211_rx - receive frame @@ -3729,7 +3737,10 @@ void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call */ -void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb); +static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + ieee80211_rx_napi(hw, skb, NULL); +} /** * ieee80211_rx_irqsafe - receive frame diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 361bb3ca335c..7d75f93bac7d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -202,8 +202,6 @@ enum ieee80211_packet_rx_flags { * @IEEE80211_RX_CMNTR: received on cooked monitor already * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported * to cfg80211_report_obss_beacon(). - * @IEEE80211_RX_REORDER_TIMER: this frame is released by the - * reorder buffer timeout timer, not the normal RX path * * These flags are used across handling multiple interfaces * for a single frame. @@ -211,10 +209,10 @@ enum ieee80211_packet_rx_flags { enum ieee80211_rx_flags { IEEE80211_RX_CMNTR = BIT(0), IEEE80211_RX_BEACON_REPORTED = BIT(1), - IEEE80211_RX_REORDER_TIMER = BIT(2), }; struct ieee80211_rx_data { + struct napi_struct *napi; struct sk_buff *skb; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -1347,8 +1345,6 @@ struct ieee80211_local { struct ieee80211_sub_if_data __rcu *p2p_sdata; - struct napi_struct *napi; - /* virtual monitor interface */ struct ieee80211_sub_if_data __rcu *monitor_sdata; struct cfg80211_chan_def monitor_chandef; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 3c63468b4dfb..dba0a86dee18 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1132,18 +1132,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_register_hw); -void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi, - struct net_device *napi_dev, - int (*poll)(struct napi_struct *, int), - int weight) -{ - struct ieee80211_local *local = hw_to_local(hw); - - netif_napi_add(napi_dev, napi, poll, weight); - local->napi = napi; -} -EXPORT_SYMBOL_GPL(ieee80211_napi_add); - void ieee80211_unregister_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dd6bb2a54d45..817bf22dad5a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2148,9 +2148,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) && - rx->local->napi) - napi_gro_receive(rx->local->napi, skb); + if (rx->napi) + napi_gro_receive(rx->napi, skb); else netif_receive_skb(skb); } @@ -3256,7 +3255,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .flags = IEEE80211_RX_REORDER_TIMER, + .napi = NULL, /* must be NULL to not have races */ }; struct tid_ampdu_rx *tid_agg_rx; @@ -3433,7 +3432,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, - struct sk_buff *skb) + struct sk_buff *skb, + struct napi_struct *napi) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; @@ -3449,6 +3449,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, memset(&rx, 0, sizeof(rx)); rx.skb = skb; rx.local = local; + rx.napi = napi; if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) I802_DEBUG_INC(local->dot11ReceivedFragmentCount); @@ -3550,7 +3551,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, * This is the receive path handler. It is called by a low level driver when an * 802.11 MPDU is received from the hardware. */ -void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, + struct napi_struct *napi) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate = NULL; @@ -3649,7 +3651,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) ieee80211_tpt_led_trig_rx(local, ((struct ieee80211_hdr *)skb->data)->frame_control, skb->len); - __ieee80211_rx_handle_packet(hw, skb); + __ieee80211_rx_handle_packet(hw, skb, napi); rcu_read_unlock(); @@ -3657,7 +3659,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) drop: kfree_skb(skb); } -EXPORT_SYMBOL(ieee80211_rx); +EXPORT_SYMBOL(ieee80211_rx_napi); /* This is a version of the rx handler that can be called from hard irq * context. Post the skb on the queue and schedule the tasklet */ -- cgit v1.2.3 From 0c028b5fd1bd10d5777756e571c6c1971f04062b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Jun 2015 14:33:54 +0200 Subject: mac80211: remove zero-length A-MPDU subframe reporting As there's no driver using this capability and reporting zero-length A-MPDU subframes for radiotap monitoring, remove the capability to free up two RX flags. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 +----- net/mac80211/rx.c | 7 +------ 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ff68b8c4ab35..7417fee18185 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -997,9 +997,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference * number (@ampdu_reference) must be populated and be a distinct number for * each A-MPDU - * @RX_FLAG_AMPDU_REPORT_ZEROLEN: driver reports 0-length subframes - * @RX_FLAG_AMPDU_IS_ZEROLEN: This is a zero-length subframe, for - * monitoring purposes only * @RX_FLAG_AMPDU_LAST_KNOWN: last subframe is known, should be set on all * subframes of a single A-MPDU * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU @@ -1039,8 +1036,7 @@ enum mac80211_rx_flags { RX_FLAG_NO_SIGNAL_VAL = BIT(12), RX_FLAG_HT_GF = BIT(13), RX_FLAG_AMPDU_DETAILS = BIT(14), - RX_FLAG_AMPDU_REPORT_ZEROLEN = BIT(15), - RX_FLAG_AMPDU_IS_ZEROLEN = BIT(16), + /* bits 15/16 free */ RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), RX_FLAG_AMPDU_IS_LAST = BIT(18), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 817bf22dad5a..9d95cb8e8e95 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -122,8 +122,7 @@ static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len, hdr = (void *)(skb->data + rtap_vendor_space); if (status->flag & (RX_FLAG_FAILED_FCS_CRC | - RX_FLAG_FAILED_PLCP_CRC | - RX_FLAG_AMPDU_IS_ZEROLEN)) + RX_FLAG_FAILED_PLCP_CRC)) return true; if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space)) @@ -391,10 +390,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS); put_unaligned_le32(status->ampdu_reference, pos); pos += 4; - if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN) - flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN; - if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN) - flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN; if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN; if (status->flag & RX_FLAG_AMPDU_IS_LAST) -- cgit v1.2.3 From 981d94a80174e4f33bd5015fb49051bfc2eb00d2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Jun 2015 14:39:02 +0200 Subject: mac80211: support device/driver PN check for CCMP/GCMP When there are multiple RX queues, the PN checks in mac80211 cannot be used since packets might be processed out of order on different CPUs. Allow the driver to report that the PN has been checked, drivers that will use multi-queue RX will have to set this flag. For now, the flag is only valid when the frame has been decrypted, in theory that restriction doesn't have to be there, but in practice the hardware will have decrypted the frame already. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++- net/mac80211/wpa.c | 83 +++++++++++++++++++++++++++----------------------- 2 files changed, 51 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7417fee18185..4d3d2686f278 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -973,6 +973,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame. * If this flag is set, the stack cannot do any replay detection * hence the driver or hardware will have to do that. + * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this + * flag indicates that the PN was verified for replay protection. + * Note that this flag is also currently only supported when a frame + * is also decrypted (ie. @RX_FLAG_DECRYPTED must be set) * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on @@ -1036,7 +1040,8 @@ enum mac80211_rx_flags { RX_FLAG_NO_SIGNAL_VAL = BIT(12), RX_FLAG_HT_GF = BIT(13), RX_FLAG_AMPDU_DETAILS = BIT(14), - /* bits 15/16 free */ + RX_FLAG_PN_VALIDATED = BIT(15), + /* bit 16 free */ RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), RX_FLAG_AMPDU_IS_LAST = BIT(18), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 943f7606527e..feb547dc8643 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -516,31 +516,34 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, return RX_DROP_UNUSABLE; } - ccmp_hdr2pn(pn, skb->data + hdrlen); + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { + ccmp_hdr2pn(pn, skb->data + hdrlen); - queue = rx->security_idx; + queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) { - key->u.ccmp.replays++; - return RX_DROP_UNUSABLE; - } + if (memcmp(pn, key->u.ccmp.rx_pn[queue], + IEEE80211_CCMP_PN_LEN) <= 0) { + key->u.ccmp.replays++; + return RX_DROP_UNUSABLE; + } - if (!(status->flag & RX_FLAG_DECRYPTED)) { - u8 aad[2 * AES_BLOCK_SIZE]; - u8 b_0[AES_BLOCK_SIZE]; - /* hardware didn't decrypt/verify MIC */ - ccmp_special_blocks(skb, pn, b_0, aad); + if (!(status->flag & RX_FLAG_DECRYPTED)) { + u8 aad[2 * AES_BLOCK_SIZE]; + u8 b_0[AES_BLOCK_SIZE]; + /* hardware didn't decrypt/verify MIC */ + ccmp_special_blocks(skb, pn, b_0, aad); + + if (ieee80211_aes_ccm_decrypt( + key->u.ccmp.tfm, b_0, aad, + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, + data_len, + skb->data + skb->len - mic_len, mic_len)) + return RX_DROP_UNUSABLE; + } - if (ieee80211_aes_ccm_decrypt( - key->u.ccmp.tfm, b_0, aad, - skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, - data_len, - skb->data + skb->len - mic_len, mic_len)) - return RX_DROP_UNUSABLE; + memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); } - memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); - /* Remove CCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_UNUSABLE; @@ -739,31 +742,35 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } - gcmp_hdr2pn(pn, skb->data + hdrlen); + if (!(status->flag & RX_FLAG_PN_VALIDATED)) { + gcmp_hdr2pn(pn, skb->data + hdrlen); - queue = rx->security_idx; + queue = rx->security_idx; - if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) { - key->u.gcmp.replays++; - return RX_DROP_UNUSABLE; - } + if (memcmp(pn, key->u.gcmp.rx_pn[queue], + IEEE80211_GCMP_PN_LEN) <= 0) { + key->u.gcmp.replays++; + return RX_DROP_UNUSABLE; + } - if (!(status->flag & RX_FLAG_DECRYPTED)) { - u8 aad[2 * AES_BLOCK_SIZE]; - u8 j_0[AES_BLOCK_SIZE]; - /* hardware didn't decrypt/verify MIC */ - gcmp_special_blocks(skb, pn, j_0, aad); + if (!(status->flag & RX_FLAG_DECRYPTED)) { + u8 aad[2 * AES_BLOCK_SIZE]; + u8 j_0[AES_BLOCK_SIZE]; + /* hardware didn't decrypt/verify MIC */ + gcmp_special_blocks(skb, pn, j_0, aad); + + if (ieee80211_aes_gcm_decrypt( + key->u.gcmp.tfm, j_0, aad, + skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN, + data_len, + skb->data + skb->len - + IEEE80211_GCMP_MIC_LEN)) + return RX_DROP_UNUSABLE; + } - if (ieee80211_aes_gcm_decrypt( - key->u.gcmp.tfm, j_0, aad, - skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN, - data_len, - skb->data + skb->len - IEEE80211_GCMP_MIC_LEN)) - return RX_DROP_UNUSABLE; + memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); } - memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); - /* Remove GCMP header and MIC */ if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN)) return RX_DROP_UNUSABLE; -- cgit v1.2.3 From b98fb44ffceeac717789e8f2fb3497e6b8c5c65b Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 10 Jun 2015 20:42:59 +0300 Subject: mac80211: define TDLS wider BW support bits Allow a device to specify support for the TDLS wider-bandwidth feature. Indicate this support during TDLS setup in the ext-capab IE and set an appropriate station flag when our TDLS peer supports it. This feature gives TDLS peers the ability to use a wider channel than the base width of the BSS. For instance VHT capable TDLS peers connected on a 20MHz channel can extend the channel to 80MHz, if regulatory considerations allow it. Do not cap the bandwidth of such stations by the current BSS channel width in mac80211. Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/cfg.c | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/sta_info.h | 3 +++ net/mac80211/tdls.c | 18 +++++++++++++----- net/mac80211/vht.c | 8 ++++++-- 6 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 4d3d2686f278..8f61a230c482 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1887,6 +1887,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands * in one command, mac80211 doesn't have to run separate scans per band. * + * @IEEE80211_HW_TDLS_WIDER_BW: The device/driver supports wider bandwidth + * than then BSS bandwidth for a TDLS link on the base channel. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -1919,6 +1922,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_CHANCTX_STA_CSA, IEEE80211_HW_SUPPORTS_CLONED_SKBS, IEEE80211_HW_SINGLE_SCAN_ON_ALL_BANDS, + IEEE80211_HW_TDLS_WIDER_BW, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b145942a7624..a32575bf0546 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1155,6 +1155,12 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH) set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH); + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && + params->ext_capab_len >= 8 && + params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED) + set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW); + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { sta->sta.uapsd_queues = params->uapsd_queues; sta->sta.max_sp = params->max_sp; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2c79d777f0e4..ced6bf3be8d6 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -122,6 +122,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { FLAG(CHANCTX_STA_CSA), FLAG(SUPPORTS_CLONED_SKBS), FLAG(SINGLE_SCAN_ON_ALL_BANDS), + FLAG(TDLS_WIDER_BW), /* keep last for the build bug below */ (void *)0x1 diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9e568927c080..b9c1aaaa01ff 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -53,6 +53,8 @@ * @WLAN_STA_TDLS_CHAN_SWITCH: This TDLS peer supports TDLS channel-switching * @WLAN_STA_TDLS_OFF_CHANNEL: The local STA is currently off-channel with this * TDLS peer + * @WLAN_STA_TDLS_WIDER_BW: This TDLS peer supports working on a wider bw on + * the BSS base channel. * @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was * keeping station in power-save mode, reply when the driver * unblocks the station. @@ -84,6 +86,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_TDLS_INITIATOR, WLAN_STA_TDLS_CHAN_SWITCH, WLAN_STA_TDLS_OFF_CHANNEL, + WLAN_STA_TDLS_WIDER_BW, WLAN_STA_UAPSD, WLAN_STA_SP, WLAN_STA_4ADDR_EVENT, diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index ad31b2dab4f5..fec1b336d03c 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -35,20 +35,28 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk) mutex_unlock(&local->mtx); } -static void ieee80211_tdls_add_ext_capab(struct ieee80211_local *local, +static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - u8 *pos = (void *)skb_put(skb, 7); + struct ieee80211_local *local = sdata->local; bool chan_switch = local->hw.wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH; + bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW); + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; + bool vht = sband && sband->vht_cap.vht_supported; + u8 *pos = (void *)skb_put(skb, 10); *pos++ = WLAN_EID_EXT_CAPABILITY; - *pos++ = 5; /* len */ + *pos++ = 8; /* len */ *pos++ = 0x0; *pos++ = 0x0; *pos++ = 0x0; *pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0; *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; + *pos++ = 0; + *pos++ = 0; + *pos++ = (vht && wider_band) ? WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED : 0; } static u8 @@ -320,7 +328,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, offset = noffset; } - ieee80211_tdls_add_ext_capab(local, skb); + ieee80211_tdls_add_ext_capab(sdata, skb); /* add the QoS element if we support it */ if (local->hw.queues >= IEEE80211_NUM_ACS && @@ -784,7 +792,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, max(sizeof(struct ieee80211_mgmt), sizeof(struct ieee80211_tdls_data)) + 50 + /* supported rates */ - 7 + /* ext capab */ + 10 + /* ext capab */ 26 + /* max(WMM-info, WMM-param) */ 2 + max(sizeof(struct ieee80211_ht_cap), sizeof(struct ieee80211_ht_operation)) + diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 80694d55db74..f05808d0d80f 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -308,11 +308,15 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; enum ieee80211_sta_rx_bandwidth bw; + enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width; - bw = ieee80211_chan_width_to_rx_bw(sdata->vif.bss_conf.chandef.width); - bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); + bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); + /* do not cap the BW of TDLS WIDER_BW peers by the bss */ + if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); + return bw; } -- cgit v1.2.3 From f9a060f4b2003eb7350762e60dfc576447e44bad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Jun 2015 14:55:34 +0200 Subject: mac80211: add pointer for driver use to key Some drivers may need to store data per key, for example for PN validation. Allow this by adding a pointer to the struct that the driver can assign. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f61a230c482..484cc14fb947 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1492,8 +1492,10 @@ enum ieee80211_key_flags { * - Temporal Authenticator Rx MIC Key (64 bits) * @icv_len: The ICV length for this key type * @iv_len: The IV length for this key type + * @drv_priv: pointer for driver use */ struct ieee80211_key_conf { + void *drv_priv; atomic64_t tx_pn; u32 cipher; u8 icv_len; -- cgit v1.2.3 From 33d8783c58427683b533664f67f8c4378ed64495 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Jun 2015 17:47:05 +0200 Subject: cfg80211: allow mgmt_frame_register callback to sleep This callback is currently not allowed to sleep, which makes it more difficult to implement proper driver methods in mac80211 than it has to be. Instead of doing asynchronous work here in mac80211, make it possible for the callback to sleep by doing some asynchronous work in cfg80211. This also enables improvements to other drivers, like ath6kl, that would like to sleep in this callback. While at it, also fix the code to call the driver on the implicit unregistration when an interface is removed, and do that also when a P2P-Device wdev is destroyed (otherwise we leak the structs.) Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +- net/wireless/core.c | 5 ++++ net/wireless/core.h | 5 ++++ net/wireless/mlme.c | 75 +++++++++++++++++++++++++++++++++++++++---------- net/wireless/rdev-ops.h | 2 ++ 5 files changed, 73 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a741678f24a2..9a529c48f6ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2369,8 +2369,7 @@ struct cfg80211_qos_map { * method returns 0.) * * @mgmt_frame_register: Notify driver that a management frame type was - * registered. Note that this callback may not sleep, and cannot run - * concurrently with itself. + * registered. The callback is allowed to sleep. * * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device. * Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may diff --git a/net/wireless/core.c b/net/wireless/core.c index 2a0bbd22854b..3893409dee95 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -407,6 +407,9 @@ use_default_name: INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_LIST_HEAD(&rdev->mlme_unreg); + spin_lock_init(&rdev->mlme_unreg_lock); + INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk); INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT @@ -802,6 +805,7 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); + flush_work(&rdev->mlme_unreg_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -855,6 +859,7 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: + cfg80211_mlme_purge_registrations(wdev); cfg80211_stop_p2p_device(rdev, wdev); break; default: diff --git a/net/wireless/core.h b/net/wireless/core.h index 311eef26bf88..b9d5bc8c148d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -59,6 +59,10 @@ struct cfg80211_registered_device { struct list_head beacon_registrations; spinlock_t beacon_registrations_lock; + struct list_head mlme_unreg; + spinlock_t mlme_unreg_lock; + struct work_struct mlme_unreg_wk; + /* protected by RTNL only */ int num_running_ifaces; int num_running_monitor_ifaces; @@ -348,6 +352,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len); +void cfg80211_mlme_unreg_wk(struct work_struct *wk); void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 7aae329e2b4e..fb44fa3bf4ef 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -2,6 +2,7 @@ * cfg80211 MLME SAP interface * * Copyright (c) 2009, Jouni Malinen + * Copyright (c) 2015 Intel Deutschland GmbH */ #include @@ -389,6 +390,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct cfg80211_mgmt_registration { struct list_head list; + struct wireless_dev *wdev; u32 nlportid; @@ -399,6 +401,46 @@ struct cfg80211_mgmt_registration { u8 match[]; }; +static void +cfg80211_process_mlme_unregistrations(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_mgmt_registration *reg; + + ASSERT_RTNL(); + + spin_lock_bh(&rdev->mlme_unreg_lock); + while ((reg = list_first_entry_or_null(&rdev->mlme_unreg, + struct cfg80211_mgmt_registration, + list))) { + list_del(®->list); + spin_unlock_bh(&rdev->mlme_unreg_lock); + + if (rdev->ops->mgmt_frame_register) { + u16 frame_type = le16_to_cpu(reg->frame_type); + + rdev_mgmt_frame_register(rdev, reg->wdev, + frame_type, false); + } + + kfree(reg); + + spin_lock_bh(&rdev->mlme_unreg_lock); + } + spin_unlock_bh(&rdev->mlme_unreg_lock); +} + +void cfg80211_mlme_unreg_wk(struct work_struct *wk) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(wk, struct cfg80211_registered_device, + mlme_unreg_wk); + + rtnl_lock(); + cfg80211_process_mlme_unregistrations(rdev); + rtnl_unlock(); +} + int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) @@ -449,11 +491,18 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, nreg->match_len = match_len; nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); + nreg->wdev = wdev; list_add(&nreg->list, &wdev->mgmt_registrations); + spin_unlock_bh(&wdev->mgmt_registrations_lock); + + /* process all unregistrations to avoid driver confusion */ + cfg80211_process_mlme_unregistrations(rdev); if (rdev->ops->mgmt_frame_register) rdev_mgmt_frame_register(rdev, wdev, frame_type, true); + return 0; + out: spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -472,15 +521,12 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) if (reg->nlportid != nlportid) continue; - if (rdev->ops->mgmt_frame_register) { - u16 frame_type = le16_to_cpu(reg->frame_type); - - rdev_mgmt_frame_register(rdev, wdev, - frame_type, false); - } - list_del(®->list); - kfree(reg); + spin_lock(&rdev->mlme_unreg_lock); + list_add_tail(®->list, &rdev->mlme_unreg); + spin_unlock(&rdev->mlme_unreg_lock); + + schedule_work(&rdev->mlme_unreg_wk); } spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -496,16 +542,15 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { - struct cfg80211_mgmt_registration *reg, *tmp; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); spin_lock_bh(&wdev->mgmt_registrations_lock); - - list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - list_del(®->list); - kfree(reg); - } - + spin_lock(&rdev->mlme_unreg_lock); + list_splice_tail_init(&wdev->mgmt_registrations, &rdev->mlme_unreg); + spin_unlock(&rdev->mlme_unreg_lock); spin_unlock_bh(&wdev->mgmt_registrations_lock); + + cfg80211_process_mlme_unregistrations(rdev); } int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index c6e83a7468c0..c23516d0f807 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -733,6 +733,8 @@ static inline void rdev_mgmt_frame_register(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u16 frame_type, bool reg) { + might_sleep(); + trace_rdev_mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg); rdev->ops->mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg); trace_rdev_return_void(&rdev->wiphy); -- cgit v1.2.3 From 3b8a684bd6cbc13dfd21ca41814c304e9f27ec7f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 3 Aug 2015 17:24:08 +0200 Subject: drm/atomic-helper: Add an atomice best_encoder callback With legacy helpers all the routing was already set up when calling best_encoder and so could be inspected. But with atomic it's staged, hence we need a new atomic compliant callback for drivers which need to inspect the requested state and can't just decided the best encoder statically. This is needed to fix up i915 dp mst where we need to pick the right encoder depending upon the requested CRTC for the connector. v2: Don't forget to amend the kerneldoc Cc: Chris Wilson Cc: Linus Torvalds Cc: Theodore Ts'o Acked-by: Thierry Reding Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_atomic_helper.c | 7 ++++++- include/drm/drm_crtc_helper.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index aac212297b49..8694ca9beee3 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -196,7 +196,12 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx) } funcs = connector->helper_private; - new_encoder = funcs->best_encoder(connector); + + if (funcs->atomic_best_encoder) + new_encoder = funcs->atomic_best_encoder(connector, + connector_state); + else + new_encoder = funcs->best_encoder(connector); if (!new_encoder) { DRM_DEBUG_ATOMIC("No suitable encoder found for [CONNECTOR:%d:%s]\n", diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index c8fc187061de..918aa68b5199 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -168,6 +168,7 @@ struct drm_encoder_helper_funcs { * @get_modes: get mode list for this connector * @mode_valid: is this mode valid on the given connector? (optional) * @best_encoder: return the preferred encoder for this connector + * @atomic_best_encoder: atomic version of @best_encoder * * The helper operations are called by the mid-layer CRTC helper. */ @@ -176,6 +177,8 @@ struct drm_connector_helper_funcs { enum drm_mode_status (*mode_valid)(struct drm_connector *connector, struct drm_display_mode *mode); struct drm_encoder *(*best_encoder)(struct drm_connector *connector); + struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector, + struct drm_connector_state *connector_state); }; extern void drm_helper_disable_unused_functions(struct drm_device *dev); -- cgit v1.2.3 From 6dc6db790a67d28e46abefc44ca1a3bd438b2920 Mon Sep 17 00:00:00 2001 From: "Subhransu S. Prusty" Date: Mon, 29 Jun 2015 17:36:44 +0100 Subject: ASoC: topology: Add subsequence in topology Some widgets may need sorting within, So add this support in topology. Signed-off-by: Subhransu S. Prusty Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 1 + sound/soc/soc-topology.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 12215205ab8d..7ae13fbc0a3e 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -347,6 +347,7 @@ struct snd_soc_tplg_dapm_widget { __le32 reg; /* negative reg = no direct dapm */ __le32 shift; /* bits to shift */ __le32 mask; /* non-shifted mask */ + __le32 subseq; /* sort within widget type */ __u32 invert; /* invert the power bit */ __u32 ignore_suspend; /* kept enabled over suspend */ __u16 event_flags; diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 6a547c6dd3a1..9f2b048f1071 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1351,6 +1351,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, template.reg = w->reg; template.shift = w->shift; template.mask = w->mask; + template.subseq = w->subseq; template.on_val = w->invert ? 0 : 1; template.off_val = w->invert ? 1 : 0; template.ignore_suspend = w->ignore_suspend; -- cgit v1.2.3 From c3879956957b8de9fd6cbad604e668fd00c6506c Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 29 Jun 2015 17:36:46 +0100 Subject: ASoC: topology: add private data to manifest The topology file manifest should include a private data field. This allows vendors to specify vendor data in the manifest, like timestamps, hashes, additional information for removing platform configuration out of drivers and making these configurable per platform Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 7ae13fbc0a3e..d550c8d40269 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -238,6 +238,7 @@ struct snd_soc_tplg_manifest { __le32 graph_elems; /* number of graph elements */ __le32 dai_elems; /* number of DAI elements */ __le32 dai_link_elems; /* number of DAI link elements */ + struct snd_soc_tplg_private priv; } __attribute__((packed)); /* -- cgit v1.2.3 From 28a87eebcad40101b1b273cbd4f2a02c104f9367 Mon Sep 17 00:00:00 2001 From: Mengdong Lin Date: Wed, 5 Aug 2015 14:41:13 +0100 Subject: ASoC: topology: Update TLV support so we can support more TLV types Currently the TLV topology structure is targeted at only supporting the DB scale data. This patch extends support for the other TLV types so they can be easily added at a later stage. TLV structure is moved to common topology control header since it's a common field for controls and can be processed in a general way. Users must set a proper access flag for a control since it's used to decide if the TLV field is valid and if a TLV callback is needed. Removed the following fields from topology TLV struct: - size/count: type can decide the size. - numid: not needed to initialize TLV for kcontrol. - data: replaced by the type specific struct. Added TLV structure to generic control header and removed TLV structure from mixer control. Signed-off-by: Mengdong Lin Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 19 +++++++++++----- sound/soc/soc-topology.c | 58 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 2819fc1f8458..aa3a79b42438 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -137,11 +137,19 @@ struct snd_soc_tplg_private { /* * Kcontrol TLV data. */ +struct snd_soc_tplg_tlv_dbscale { + __le32 min; + __le32 step; + __le32 mute; +} __attribute__((packed)); + struct snd_soc_tplg_ctl_tlv { - __le32 size; /* in bytes aligned to 4 */ - __le32 numid; /* control element numeric identification */ - __le32 count; /* number of elem in data array */ - __le32 data[SND_SOC_TPLG_TLV_SIZE]; + __le32 size; /* in bytes of this structure */ + __le32 type; /* SNDRV_CTL_TLVT_*, type of TLV */ + union { + __le32 data[SND_SOC_TPLG_TLV_SIZE]; + struct snd_soc_tplg_tlv_dbscale scale; + }; } __attribute__((packed)); /* @@ -172,7 +180,7 @@ struct snd_soc_tplg_ctl_hdr { char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; __le32 access; struct snd_soc_tplg_kcontrol_ops_id ops; - __le32 tlv_size; /* non zero means control has TLV data */ + struct snd_soc_tplg_ctl_tlv tlv; } __attribute__((packed)); /* @@ -260,7 +268,6 @@ struct snd_soc_tplg_mixer_control { __le32 invert; __le32 num_channels; struct snd_soc_tplg_channel channel[SND_SOC_TPLG_MAX_CHAN]; - struct snd_soc_tplg_ctl_tlv tlv; struct snd_soc_tplg_private priv; } __attribute__((packed)); diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2c70f30d2d78..31068b8f3db0 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * We make several passes over the data (since it wont necessarily be ordered) @@ -579,28 +580,51 @@ static int soc_tplg_init_kcontrol(struct soc_tplg *tplg, return 0; } + +static int soc_tplg_create_tlv_db_scale(struct soc_tplg *tplg, + struct snd_kcontrol_new *kc, struct snd_soc_tplg_tlv_dbscale *scale) +{ + unsigned int item_len = 2 * sizeof(unsigned int); + unsigned int *p; + + p = kzalloc(item_len + 2 * sizeof(unsigned int), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p[0] = SNDRV_CTL_TLVT_DB_SCALE; + p[1] = item_len; + p[2] = scale->min; + p[3] = (scale->step & TLV_DB_SCALE_MASK) + | (scale->mute ? TLV_DB_SCALE_MUTE : 0); + + kc->tlv.p = (void *)p; + return 0; +} + static int soc_tplg_create_tlv(struct soc_tplg *tplg, - struct snd_kcontrol_new *kc, struct snd_soc_tplg_ctl_tlv *tplg_tlv) + struct snd_kcontrol_new *kc, struct snd_soc_tplg_ctl_hdr *tc) { - struct snd_ctl_tlv *tlv; - int size; + struct snd_soc_tplg_ctl_tlv *tplg_tlv; - if (tplg_tlv->count == 0) + if (!(tc->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE)) return 0; - size = ((tplg_tlv->count + (sizeof(unsigned int) - 1)) & - ~(sizeof(unsigned int) - 1)); - tlv = kzalloc(sizeof(*tlv) + size, GFP_KERNEL); - if (tlv == NULL) - return -ENOMEM; - - dev_dbg(tplg->dev, " created TLV type %d size %d bytes\n", - tplg_tlv->numid, size); + if (tc->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + kc->tlv.c = snd_soc_bytes_tlv_callback; + } else { + tplg_tlv = &tc->tlv; + switch (tplg_tlv->type) { + case SNDRV_CTL_TLVT_DB_SCALE: + return soc_tplg_create_tlv_db_scale(tplg, kc, + &tplg_tlv->scale); - tlv->numid = tplg_tlv->numid; - tlv->length = size; - memcpy(&tlv->tlv[0], tplg_tlv->data, size); - kc->tlv.p = (void *)tlv; + /* TODO: add support for other TLV types */ + default: + dev_dbg(tplg->dev, "Unsupported TLV type %d\n", + tplg_tlv->type); + return -EINVAL; + } + } return 0; } @@ -772,7 +796,7 @@ static int soc_tplg_dmixer_create(struct soc_tplg *tplg, unsigned int count, } /* create any TLV data */ - soc_tplg_create_tlv(tplg, &kc, &mc->tlv); + soc_tplg_create_tlv(tplg, &kc, &mc->hdr); /* register control here */ err = soc_tplg_add_kcontrol(tplg, &kc, -- cgit v1.2.3 From cb88498b36ab01cbe3a0d95cd097e4afdff4c6fd Mon Sep 17 00:00:00 2001 From: Mengdong Lin Date: Wed, 5 Aug 2015 14:41:14 +0100 Subject: ASoC: topology: Add ops support to byte controls UAPI Add UAPI support for setting byte control ops. Rename the ops structure to be more generic so it can be sued by other objects too. Signed-off-by: Mengdong Lin Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index aa3a79b42438..d5281ac8e5eb 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -163,9 +163,11 @@ struct snd_soc_tplg_channel { } __attribute__((packed)); /* - * Kcontrol Operations IDs + * Genericl Operations IDs, for binding Kcontrol or Bytes ext ops + * Kcontrol ops need get/put/info. + * Bytes ext ops need get/put. */ -struct snd_soc_tplg_kcontrol_ops_id { +struct snd_soc_tplg_io_ops { __le32 get; __le32 put; __le32 info; @@ -179,7 +181,7 @@ struct snd_soc_tplg_ctl_hdr { __le32 type; char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; __le32 access; - struct snd_soc_tplg_kcontrol_ops_id ops; + struct snd_soc_tplg_io_ops ops; struct snd_soc_tplg_ctl_tlv tlv; } __attribute__((packed)); @@ -311,6 +313,7 @@ struct snd_soc_tplg_bytes_control { __le32 mask; __le32 base; __le32 num_regs; + struct snd_soc_tplg_io_ops ext_ops; struct snd_soc_tplg_private priv; } __attribute__((packed)); -- cgit v1.2.3 From c7bcf8777a539e64dafc7417c00047aee6eb8909 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 5 Aug 2015 14:41:15 +0100 Subject: ASoC: topology: Add private data type and bump ABI version to 3 Add ID for standalone private data object types and bump ABI version to 3 in order to userpsace features. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- include/uapi/sound/asoc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index d5281ac8e5eb..51b8066a223b 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -77,7 +77,7 @@ #define SND_SOC_TPLG_NUM_TEXTS 16 /* ABI version */ -#define SND_SOC_TPLG_ABI_VERSION 0x2 +#define SND_SOC_TPLG_ABI_VERSION 0x3 /* Max size of TLV data */ #define SND_SOC_TPLG_TLV_SIZE 32 @@ -97,7 +97,8 @@ #define SND_SOC_TPLG_TYPE_PCM 7 #define SND_SOC_TPLG_TYPE_MANIFEST 8 #define SND_SOC_TPLG_TYPE_CODEC_LINK 9 -#define SND_SOC_TPLG_TYPE_MAX SND_SOC_TPLG_TYPE_CODEC_LINK +#define SND_SOC_TPLG_TYPE_PDATA 10 +#define SND_SOC_TPLG_TYPE_MAX SND_SOC_TPLG_TYPE_PDATA /* vendor block IDs - please add new vendor types to end */ #define SND_SOC_TPLG_TYPE_VENDOR_FW 1000 -- cgit v1.2.3 From 4248b0da460839e30eaaad78992b9a1dd3e63e21 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 6 Aug 2015 15:46:20 -0700 Subject: fs, file table: reinit files_stat.max_files after deferred memory initialisation Dave Hansen reported the following; My laptop has been behaving strangely with 4.2-rc2. Once I log in to my X session, I start getting all kinds of strange errors from applications and see this in my dmesg: VFS: file-max limit 8192 reached The problem is that the file-max is calculated before memory is fully initialised and miscalculates how much memory the kernel is using. This patch recalculates file-max after deferred memory initialisation. Note that using memory hotplug infrastructure would not have avoided this problem as the value is not recalculated after memory hot-add. 4.1: files_stat.max_files = 6582781 4.2-rc2: files_stat.max_files = 8192 4.2-rc2 patched: files_stat.max_files = 6562467 Small differences with the patch applied and 4.1 but not enough to matter. Signed-off-by: Mel Gorman Reported-by: Dave Hansen Cc: Nicolai Stange Cc: Dave Hansen Cc: Alex Ng Cc: Fengguang Wu Cc: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 13 +++---------- fs/file_table.c | 24 +++++++++++++++--------- include/linux/fs.h | 5 +++-- init/main.c | 2 +- mm/page_alloc.c | 3 +++ 5 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 5c8ea15e73a5..9b5fe503f6cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void) inode_init_early(); } -void __init vfs_caches_init(unsigned long mempages) +void __init vfs_caches_init(void) { - unsigned long reserve; - - /* Base hash sizes on available memory, with a reserve equal to - 150% of current kernel size */ - - reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); - mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); dcache_init(); inode_init(); - files_init(mempages); + files_init(); + files_maxfiles_init(); mnt_init(); bdev_cache_init(); chrdev_init(); diff --git a/fs/file_table.c b/fs/file_table.c index 7f9d407c7595..ad17e05ebf95 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -308,19 +309,24 @@ void put_filp(struct file *file) } } -void __init files_init(unsigned long mempages) +void __init files_init(void) { - unsigned long n; - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); +} - /* - * One file with associated inode and dcache is very roughly 1K. - * Per default don't use more than 10% of our memory for files. - */ +/* + * One file with associated inode and dcache is very roughly 1K. Per default + * do not use more than 10% of our memory for files. + */ +void __init files_maxfiles_init(void) +{ + unsigned long n; + unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; + + memreserve = min(memreserve, totalram_pages - 1); + n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; - n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/include/linux/fs.h b/include/linux/fs.h index cc008c338f5a..84b783f277f7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -55,7 +55,8 @@ struct vm_fault; extern void __init inode_init(void); extern void __init inode_init_early(void); -extern void __init files_init(unsigned long); +extern void __init files_init(void); +extern void __init files_maxfiles_init(void); extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); @@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp); /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); -extern void __init vfs_caches_init(unsigned long); +extern void __init vfs_caches_init(void); extern struct kmem_cache *names_cachep; diff --git a/init/main.c b/init/main.c index c5d5626289ce..56506553d4d8 100644 --- a/init/main.c +++ b/init/main.c @@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void) key_init(); security_init(); dbg_late_init(); - vfs_caches_init(totalram_pages); + vfs_caches_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 322628278ae4..cb61f44eb3fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1201,6 +1201,9 @@ void __init page_alloc_init_late(void) /* Block until all are initialised */ wait_for_completion(&pgdat_init_all_done_comp); + + /* Reinit limits that are based on free pages after the kernel is up */ + files_maxfiles_init(); } #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ -- cgit v1.2.3 From f4c18e6f7b5bbb5b528b3334115806b0d76f50f9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 6 Aug 2015 15:47:08 -0700 Subject: mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_* The race condition addressed in commit add05cecef80 ("mm: soft-offline: don't free target page in successful page migration") was not closed completely, because that can happen not only for soft-offline, but also for hard-offline. Consider that a slab page is about to be freed into buddy pool, and then an uncorrected memory error hits the page just after entering __free_one_page(), then VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP) is triggered, despite the fact that it's not necessary because the data on the affected page is not consumed. To solve it, this patch drops __PG_HWPOISON from page flag checks at allocation/free time. I think it's justified because __PG_HWPOISON flags is defined to prevent the page from being reused, and setting it outside the page's alloc-free cycle is a designed behavior (not a bug.) For recent months, I was annoyed about BUG_ON when soft-offlined page remains on lru cache list for a while, which is avoided by calling put_page() instead of putback_lru_page() in page migration's success path. This means that this patch reverts a major change from commit add05cecef80 about the new refcounting rule of soft-offlined pages, so "reuse window" revives. This will be closed by a subsequent patch. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Dean Nelson Cc: Tony Luck Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 10 +++++++--- mm/huge_memory.c | 7 +------ mm/migrate.c | 5 ++++- mm/page_alloc.c | 4 ++++ 4 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34e040b34e9..41c93844fb1d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ + 1 << PG_unevictable | __PG_MLOCKED | \ __PG_COMPOUND_LOCK) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have any flags set. It they are set, + * Pages being prepped should not have these flags set. It they are set, * there has been a kernel bug or struct page corruption. + * + * __PG_HWPOISON is exceptional because it needs to be kept beyond page's + * alloc-free cycle to prevent from reusing the page. */ -#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) +#define PAGE_FLAGS_CHECK_AT_PREP \ + (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) #define PAGE_FLAGS_PRIVATE \ (1 << PG_private | 1 << PG_private_2) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c107094f79ba..097c7a4bfbd9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page, /* after clearing PageTail the gup refcount can be released */ smp_mb__after_atomic(); - /* - * retain hwpoison flag of the poisoned tail page: - * fix for the unsuitable process killed on Guest Machine(KVM) - * by the memory-failure. - */ - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; + page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (page->flags & ((1L << PG_referenced) | (1L << PG_swapbacked) | diff --git a/mm/migrate.c b/mm/migrate.c index ee401e4e5ef1..f2415be7d93b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -950,7 +950,10 @@ out: list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - if (reason != MR_MEMORY_FAILURE) + /* Soft-offlined page shouldn't go through lru cache list */ + if (reason == MR_MEMORY_FAILURE) + put_page(page); + else putback_lru_page(page); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb61f44eb3fc..beda41710802 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1296,6 +1296,10 @@ static inline int check_new_page(struct page *page) bad_reason = "non-NULL mapping"; if (unlikely(atomic_read(&page->_count) != 0)) bad_reason = "nonzero _count"; + if (unlikely(page->flags & __PG_HWPOISON)) { + bad_reason = "HWPoisoned (hardware-corrupted)"; + bad_flags = __PG_HWPOISON; + } if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; bad_flags = PAGE_FLAGS_CHECK_AT_PREP; -- cgit v1.2.3 From 209e4dbc8dcdb2b1839f18fd1cf07ec7bedadf4d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 7 Aug 2015 12:31:17 +0200 Subject: drm/vblank: Use u32 consistently for vblank counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 99264a61dfcda41d86d0960cf2d4c0fc2758a773 Author: Daniel Vetter Date: Wed Apr 15 19:34:43 2015 +0200 drm/vblank: Fixup and document timestamp update/read barriers I've switched vblank->count from atomic_t to unsigned long and accidentally created an integer comparison bug in drm_vblank_count_and_time since vblanke->count might overflow the u32 local copy and hence the retry loop never succeed. Fix this by consistently using u32. Cc: Michel Dänzer Reported-by: Michel Dänzer Reviewed-by: Thierry Reding Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 2 +- include/drm/drmP.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index f9cc68fbd2a3..b50fa0afd907 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -75,7 +75,7 @@ module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600) module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600); static void store_vblank(struct drm_device *dev, int crtc, - unsigned vblank_count_inc, + u32 vblank_count_inc, struct timeval *t_vblank) { struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 48db6a56975f..5aa519711e0b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -691,7 +691,7 @@ struct drm_vblank_crtc { struct timer_list disable_timer; /* delayed disable timer */ /* vblank counter, protected by dev->vblank_time_lock for writes */ - unsigned long count; + u32 count; /* vblank timestamps, protected by dev->vblank_time_lock for writes */ struct timeval time[DRM_VBLANKTIME_RBSIZE]; -- cgit v1.2.3 From da8b43c0e1dcea3bcac5f37ea59934ddaa137aed Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 4 Aug 2015 22:51:07 -0700 Subject: vxlan: combine VXLAN_FLOWBASED into VXLAN_COLLECT_METADATA IFLA_VXLAN_FLOWBASED is useless without IFLA_VXLAN_COLLECT_METADATA, so combine them into single IFLA_VXLAN_COLLECT_METADATA flag. 'flowbased' doesn't convey real meaning of the vxlan tunnel mode. This mode can be used by routing, tc+bpf and ovs. Only ovs is strictly flow based, so 'collect metadata' is a better name for this tunnel mode. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 17 ++++++----------- include/net/vxlan.h | 4 +--- include/uapi/linux/if_link.h | 1 - net/openvswitch/vport-vxlan.c | 2 +- 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e90f7a484e1c..b6731fad19ba 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1141,7 +1141,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, union vxlan_addr *remote_ip; /* For flow based devices, map all packets to VNI 0 */ - if (vs->flags & VXLAN_F_FLOW_BASED) + if (vs->flags & VXLAN_F_COLLECT_METADATA) vni = 0; /* Is this VNI defined? */ @@ -1183,7 +1183,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, skb_reset_network_header(skb); /* In flow-based mode, GBP is carried in dst_metadata */ - if (!(vs->flags & VXLAN_F_FLOW_BASED)) + if (!(vs->flags & VXLAN_F_COLLECT_METADATA)) skb->mark = md->gbp; if (oip6) @@ -2129,7 +2129,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) #endif } - if (vxlan->flags & VXLAN_F_FLOW_BASED && + if (vxlan->flags & VXLAN_F_COLLECT_METADATA && info && info->mode == IP_TUNNEL_INFO_TX) { vxlan_xmit_one(skb, dev, NULL, false); return NETDEV_TX_OK; @@ -2462,7 +2462,6 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, - [IFLA_VXLAN_FLOWBASED] = { .type = NLA_U8 }, [IFLA_VXLAN_COLLECT_METADATA] = { .type = NLA_U8 }, [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 }, @@ -2814,10 +2813,6 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_LIMIT]) conf.addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); - if (data[IFLA_VXLAN_FLOWBASED] && - nla_get_u8(data[IFLA_VXLAN_FLOWBASED])) - conf.flags |= VXLAN_F_FLOW_BASED; - if (data[IFLA_VXLAN_COLLECT_METADATA] && nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA])) conf.flags |= VXLAN_F_COLLECT_METADATA; @@ -2903,7 +2898,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_FLOWBASED */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_COLLECT_METADATA */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + @@ -2970,8 +2965,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->flags & VXLAN_F_L2MISS)) || nla_put_u8(skb, IFLA_VXLAN_L3MISS, !!(vxlan->flags & VXLAN_F_L3MISS)) || - nla_put_u8(skb, IFLA_VXLAN_FLOWBASED, - !!(vxlan->flags & VXLAN_F_FLOW_BASED)) || + nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA, + !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) || nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) || diff --git a/include/net/vxlan.h b/include/net/vxlan.h index eb8d721cdb67..e4534f1b2d8c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -181,7 +181,6 @@ struct vxlan_dev { #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 -#define VXLAN_F_FLOW_BASED 0x4000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -190,8 +189,7 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ - VXLAN_F_COLLECT_METADATA | \ - VXLAN_F_FLOW_BASED) + VXLAN_F_COLLECT_METADATA) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ea047480a1f0..f24ec99a2262 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -382,7 +382,6 @@ enum { IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, - IFLA_VXLAN_FLOWBASED, IFLA_VXLAN_COLLECT_METADATA, __IFLA_VXLAN_MAX }; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 547173336cd3..c6e937e36f8b 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -90,7 +90,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) int err; struct vxlan_config conf = { .no_share = true, - .flags = VXLAN_F_FLOW_BASED | VXLAN_F_COLLECT_METADATA, + .flags = VXLAN_F_COLLECT_METADATA, }; if (!options) { -- cgit v1.2.3 From 1525c386a1f01612c6f3f27241113d7fc8e6d72d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 6 Aug 2015 01:44:02 -0400 Subject: net: switchdev: change fdb addr for a byte array The address in the switchdev_obj_fdb structure is currently represented as a pointer. Replacing it for a 6-byte array allows switchdev to carry addresses directly read from hardware registers, not stored by the switch chip driver (as in Rocker). Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/switchdev.h | 2 +- net/bridge/br_fdb.c | 2 +- net/switchdev/switchdev.c | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index b77e0e7307d4..80bb25c5a644 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { if (found->key.pport != rocker_port->pport) continue; - fdb->addr = found->key.addr; + ether_addr_copy(fdb->addr, found->key.addr); fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); err = obj->cb(rocker_port->dev, obj); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89da8934519b..e90e1a0fa579 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -70,7 +70,7 @@ struct switchdev_obj { u32 tb_id; } ipv4_fib; struct switchdev_obj_fdb { /* PORT_FDB */ - const unsigned char *addr; + u8 addr[ETH_ALEN]; u16 vid; } fdb; } u; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875da0a4f..5656b44bf3de 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = f->addr.addr, .vid = f->vlan_id, }, }; + ether_addr_copy(obj.u.fdb.addr, f->addr.addr); switchdev_port_obj_del(f->dst->dev, &obj); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 33bafa2e703e..9db87a34f866 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -742,11 +743,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + ether_addr_copy(obj.u.fdb.addr, addr); return switchdev_port_obj_add(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -769,11 +770,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + ether_addr_copy(obj.u.fdb.addr, addr); return switchdev_port_obj_del(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); -- cgit v1.2.3 From 890248261a18c7ae22923095dfadea2c0a2a304a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 6 Aug 2015 01:44:03 -0400 Subject: net: switchdev: support static FDB addresses This patch adds a is_static boolean to the switchdev_obj_fdb structure, in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + net/switchdev/switchdev.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e90e1a0fa579..0e296b82aef3 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj { struct switchdev_obj_fdb { /* PORT_FDB */ u8 addr[ETH_ALEN]; u16 vid; + bool is_static; } fdb; } u; }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9db87a34f866..e9d1cacc4060 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -811,7 +811,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_flags = NTF_SELF; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; + ndm->ndm_state = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) goto nla_put_failure; -- cgit v1.2.3 From 55045ddded0f39d84c2ca019508973be8c595a78 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 6 Aug 2015 01:44:04 -0400 Subject: net: dsa: add support for switchdev FDB objects Remove the fdb_{add,del,getnext} function pointer in favor of new port_fdb_{add,del,getnext}. Implement the switchdev_port_obj_{add,del,dump} functions in DSA to support the SWITCHDEV_OBJ_PORT_FDB objects. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6171.c | 3 - drivers/net/dsa/mv88e6352.c | 3 - include/net/dsa.h | 16 ++-- net/dsa/slave.c | 218 +++++++++++++++++++++++--------------------- 4 files changed, 126 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 1c7808495a9d..cfa21ed1f734 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .fdb_add = mv88e6xxx_port_fdb_add, - .fdb_del = mv88e6xxx_port_fdb_del, - .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index af210efecc55..eb4630fec6f1 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .fdb_add = mv88e6xxx_port_fdb_add, - .fdb_del = mv88e6xxx_port_fdb_del, - .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63ba8f73..091d35f77180 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,12 +296,16 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - int (*fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_getnext)(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + + /* + * Forwarding database + */ + int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid, + const u8 addr[ETH_ALEN]); + int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid, + const u8 addr[ETH_ALEN]); + int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid, + u8 addr[ETH_ALEN], bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0010c690cc67..1dbdeaab2bb4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dsa_priv.h" /* slave mii_bus handling ***************************************************/ @@ -200,105 +201,6 @@ out: return 0; } -static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlm_flags) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_add) - ret = ds->drv->fdb_add(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_del) - ret = ds->drv->fdb_del(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, - const unsigned char *addr, u16 vid, - bool is_static, - u32 portid, u32 seq, int type, - unsigned int flags) -{ - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_EXT_LEARNED; - ndm->ndm_type = 0; - ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) - goto nla_put_failure; - - if (vid && nla_put_u16(skb, NDA_VLAN, vid)) - goto nla_put_failure; - - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -/* Dump information about entries, in response to GETNEIGH */ -static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int idx) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - unsigned char addr[ETH_ALEN] = { 0 }; - int ret; - - if (!ds->drv->fdb_getnext) - return -EOPNOTSUPP; - - for (; ; idx++) { - bool is_static; - - ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); - if (ret < 0) - break; - - if (idx < cb->args[0]) - continue; - - ret = dsa_slave_fill_info(dev, skb, addr, 0, - is_static, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, NLM_F_MULTI); - if (ret < 0) - break; - } - - return idx; -} - static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -364,6 +266,115 @@ static int dsa_slave_port_attr_set(struct net_device *dev, return ret; } +static int dsa_slave_port_fdb_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_fdb *fdb = &obj->u.fdb; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int err; + + if (obj->trans == SWITCHDEV_TRANS_PREPARE) + err = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP; + else if (obj->trans == SWITCHDEV_TRANS_COMMIT) + err = ds->drv->port_fdb_add(ds, p->port, fdb->vid, fdb->addr); + else + err = -EOPNOTSUPP; + + return err; +} + +static int dsa_slave_port_fdb_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_fdb *fdb = &obj->u.fdb; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (!ds->drv->port_fdb_del) + return -EOPNOTSUPP; + + return ds->drv->port_fdb_del(ds, p->port, fdb->vid, fdb->addr); +} + +static int dsa_slave_port_fdb_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_fdb *fdb = &obj->u.fdb; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int err; + + if (!ds->drv->port_fdb_getnext) + return -EOPNOTSUPP; + + memset(fdb, 0, sizeof(*fdb)); + + for (;;) { + err = ds->drv->port_fdb_getnext(ds, p->port, &fdb->vid, + fdb->addr, &fdb->is_static); + if (err) + break; + + err = obj->cb(dev, obj); + if (err) + break; + } + + return err == -ENOENT ? 0 : err; +} + +static int dsa_slave_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + int err; + + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_FDB: + err = dsa_slave_port_fdb_add(dev, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int dsa_slave_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + int err; + + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_FDB: + err = dsa_slave_port_fdb_del(dev, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int dsa_slave_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + int err; + + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_FDB: + err = dsa_slave_port_fdb_dump(dev, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { @@ -765,9 +776,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = dsa_slave_fdb_add, - .ndo_fdb_del = dsa_slave_fdb_del, - .ndo_fdb_dump = dsa_slave_fdb_dump, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -780,6 +791,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_attr_get = dsa_slave_port_attr_get, .switchdev_port_attr_set = dsa_slave_port_attr_set, + .switchdev_port_obj_add = dsa_slave_port_obj_add, + .switchdev_port_obj_del = dsa_slave_port_obj_del, + .switchdev_port_obj_dump = dsa_slave_port_obj_dump, }; static void dsa_slave_adjust_link(struct net_device *dev) -- cgit v1.2.3 From ffe8690c85b8426db7783064724d106702f1b1e8 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:32 +0000 Subject: perf: add the necessary core perf APIs when accessing events counters in eBPF programs This patch add three core perf APIs: - perf_event_attrs(): export the struct perf_event_attr from struct perf_event; - perf_event_get(): get the struct perf_event from the given fd; - perf_event_read_local(): read the events counters active on the current CPU; These APIs are needed when accessing events counters in eBPF programs. The API perf_event_read_local() comes from Peter and I add the corresponding SOB. Signed-off-by: Kaixu Xia Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/perf_event.h | 10 ++++++ kernel/events/core.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2027809433b3..092a0e8a479a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -641,6 +641,8 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); +extern struct perf_event *perf_event_get(unsigned int fd); +extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -659,6 +661,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); +extern u64 perf_event_read_local(struct perf_event *event); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -979,6 +982,12 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } +static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } +static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + return ERR_PTR(-EINVAL); +} +static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } @@ -1011,6 +1020,7 @@ static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } +static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae3419b99..e2c6a8886d4d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,6 +3212,59 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } +/* + * NMI-safe method to read a local event, that is an event that + * is: + * - either for the current task, or for this CPU + * - does not have inherit set, for inherited task events + * will not be local and we cannot read them atomically + * - must not have a pmu::count method + */ +u64 perf_event_read_local(struct perf_event *event) +{ + unsigned long flags; + u64 val; + + /* + * Disabling interrupts avoids all counter scheduling (context + * switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + + /* If this is a per-task event, it must be for current */ + WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && + event->hw.target != current); + + /* If this is a per-CPU event, it must be for this CPU */ + WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && + event->cpu != smp_processor_id()); + + /* + * It must not be an event with inherit set, we cannot read + * all child counters from atomic context. + */ + WARN_ON_ONCE(event->attr.inherit); + + /* + * It must not have a pmu::count method, those are not + * NMI safe. + */ + WARN_ON_ONCE(event->pmu->count); + + /* + * If the event is currently on this CPU, its either a per-task event, + * or local to this CPU. Furthermore it means its ACTIVE (otherwise + * oncpu == -1). + */ + if (event->oncpu == smp_processor_id()) + event->pmu->read(event); + + val = local64_read(&event->count); + local_irq_restore(flags); + + return val; +} + static u64 perf_event_read(struct perf_event *event) { /* @@ -8574,6 +8627,31 @@ void perf_event_delayed_put(struct task_struct *task) WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); } +struct perf_event *perf_event_get(unsigned int fd) +{ + int err; + struct fd f; + struct perf_event *event; + + err = perf_fget_light(fd, &f); + if (err) + return ERR_PTR(err); + + event = f.file->private_data; + atomic_long_inc(&event->refcount); + fdput(f); + + return event; +} + +const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + if (!event) + return ERR_PTR(-EINVAL); + + return &event->attr; +} + /* * inherit a event from parent task to child task: */ -- cgit v1.2.3 From 2a36f0b92eb638dd023870574eb471b1c56be9ad Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Thu, 6 Aug 2015 07:02:33 +0000 Subject: bpf: Make the bpf_prog_array_map more generic All the map backends are of generic nature. In order to avoid adding much special code into the eBPF core, rewrite part of the bpf_prog_array map code and make it more generic. So the new perf_event_array map type can reuse most of code with bpf_prog_array map and add fewer lines of special code. Signed-off-by: Wang Nan Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 6 ++-- include/linux/bpf.h | 8 +++-- kernel/bpf/arraymap.c | 80 +++++++++++++++++++++++++++------------------ kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 2 +- 5 files changed, 60 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ec5214f39aa8..70efcd0940f9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -246,7 +246,7 @@ static void emit_prologue(u8 **pprog) * goto out; * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; - * prog = array->prog[index]; + * prog = array->ptrs[index]; * if (prog == NULL) * goto out; * goto *(prog->bpf_func + prologue_size); @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ - /* prog = array->prog[index]; */ + /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ - offsetof(struct bpf_array, prog)); + offsetof(struct bpf_array, ptrs)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 139d6d2e123f..d495211d63d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,10 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + + /* funcs called by prog_array and perf_event_array map */ + void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); + void (*map_fd_put_ptr) (void *ptr); }; struct bpf_map { @@ -142,13 +146,13 @@ struct bpf_array { bool owner_jited; union { char value[0] __aligned(8); - struct bpf_prog *prog[0] __aligned(8); + void *ptrs[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -void bpf_prog_array_map_clear(struct bpf_map *map); +void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cb31229a6fa4..45df6572ecfd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -150,15 +150,15 @@ static int __init register_array_map(void) } late_initcall(register_array_map); -static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) { - /* only bpf_prog file descriptors can be stored in prog_array map */ + /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return ERR_PTR(-EINVAL); return array_map_alloc(attr); } -static void prog_array_map_free(struct bpf_map *map) +static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -167,21 +167,21 @@ static void prog_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) - BUG_ON(array->prog[i] != NULL); + BUG_ON(array->ptrs[i] != NULL); kvfree(array); } -static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* only called from syscall */ -static int prog_array_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static int fd_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog, *old_prog; + void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) @@ -191,57 +191,75 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, return -E2BIG; ufd = *(u32 *)value; - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!bpf_prog_array_compatible(array, prog)) { - bpf_prog_put(prog); - return -EINVAL; - } + new_ptr = map->ops->map_fd_get_ptr(map, ufd); + if (IS_ERR(new_ptr)) + return PTR_ERR(new_ptr); - old_prog = xchg(array->prog + index, prog); - if (old_prog) - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, new_ptr); + if (old_ptr) + map->ops->map_fd_put_ptr(old_ptr); return 0; } -static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +static int fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *old_prog; + void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; - old_prog = xchg(array->prog + index, NULL); - if (old_prog) { - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, NULL); + if (old_ptr) { + map->ops->map_fd_put_ptr(old_ptr); return 0; } else { return -ENOENT; } } +static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog = bpf_prog_get(fd); + if (IS_ERR(prog)) + return prog; + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return ERR_PTR(-EINVAL); + } + return prog; +} + +static void prog_fd_array_put_ptr(void *ptr) +{ + struct bpf_prog *prog = ptr; + + bpf_prog_put_rcu(prog); +} + /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_prog_array_map_clear(struct bpf_map *map) +void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) - prog_array_map_delete_elem(map, &i); + fd_array_map_delete_elem(map, &i); } static const struct bpf_map_ops prog_array_ops = { - .map_alloc = prog_array_map_alloc, - .map_free = prog_array_map_free, + .map_alloc = fd_array_map_alloc, + .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, - .map_lookup_elem = prog_array_map_lookup_elem, - .map_update_elem = prog_array_map_update_elem, - .map_delete_elem = prog_array_map_delete_elem, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = prog_fd_array_get_ptr, + .map_fd_put_ptr = prog_fd_array_put_ptr, }; static struct bpf_map_type_list prog_array_type __read_mostly = { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fafa74161445..67c380cfa9ca 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -450,7 +450,7 @@ select_insn: tail_call_cnt++; - prog = READ_ONCE(array->prog[index]); + prog = READ_ONCE(array->ptrs[index]); if (unlikely(!prog)) goto out; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a1b14d197a4f..dc9b464fefa9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -72,7 +72,7 @@ static int bpf_map_release(struct inode *inode, struct file *filp) /* prog_array stores refcnt-ed bpf_prog pointers * release them all when user space closes prog_array_fd */ - bpf_prog_array_map_clear(map); + bpf_fd_array_map_clear(map); bpf_map_put(map); return 0; -- cgit v1.2.3 From ea317b267e9d03a8241893aa176fba7661d07579 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:34 +0000 Subject: bpf: Add new bpf map type to store the pointer to struct perf_event Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. This map only stores the pointer to struct perf_event. The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d495211d63d1..4fc1f4070789 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bpf_map; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2ce13c109b00..a1814e8e53a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -114,6 +114,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 45df6572ecfd..29ace107f236 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -273,3 +273,60 @@ static int __init register_prog_array_map(void) return 0; } late_initcall(register_prog_array_map); + +static void perf_event_array_map_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct perf_event *event; + const struct perf_event_attr *attr; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return event; + + attr = perf_event_attrs(event); + if (IS_ERR(attr)) + return (void *)attr; + + if (attr->type != PERF_TYPE_RAW && + attr->type != PERF_TYPE_HARDWARE) { + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); + } + return event; +} + +static void perf_event_fd_array_put_ptr(void *ptr) +{ + struct perf_event *event = ptr; + + perf_event_release_kernel(event); +} + +static const struct bpf_map_ops perf_event_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = perf_event_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = perf_event_fd_array_get_ptr, + .map_fd_put_ptr = perf_event_fd_array_put_ptr, +}; + +static struct bpf_map_type_list perf_event_array_type __read_mostly = { + .ops = &perf_event_array_ops, + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +}; + +static int __init register_perf_event_array_map(void) +{ + bpf_register_map_type(&perf_event_array_type); + return 0; +} +late_initcall(register_perf_event_array_map); -- cgit v1.2.3 From 35578d7984003097af2b1e34502bc943d40c1804 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:35 +0000 Subject: bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter According to the perf_event_map_fd and index, the function bpf_perf_event_read() can convert the corresponding map value to the pointer to struct perf_event and return the Hardware PMU counter value. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++++++++--------------- kernel/trace/bpf_trace.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fc1f4070789..f57d7fed9ec3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_perf_event_read_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1814e8e53a7..92a48e2d5461 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -271,6 +271,7 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_set_tunnel_key, + BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cd307df98cb3..48e1c7192560 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -238,6 +238,14 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; +static const struct { + int map_type; + int func_id; +} func_limit[] = { + {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, + {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, +}; + static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -837,6 +845,28 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return err; } +static int check_map_func_compatibility(struct bpf_map *map, int func_id) +{ + bool bool_map, bool_func; + int i; + + if (!map) + return 0; + + for (i = 0; i <= ARRAY_SIZE(func_limit); i++) { + bool_map = (map->map_type == func_limit[i].map_type); + bool_func = (func_id == func_limit[i].func_id); + /* only when map & func pair match it can continue. + * don't allow any other map type to be passed into + * the special func; + */ + if (bool_map != bool_func) + return -EINVAL; + } + + return 0; +} + static int check_call(struct verifier_env *env, int func_id) { struct verifier_state *state = &env->cur_state; @@ -912,21 +942,9 @@ static int check_call(struct verifier_env *env, int func_id) return -EINVAL; } - if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && - func_id != BPF_FUNC_tail_call) - /* prog_array map type needs extra care: - * only allow to pass it into bpf_tail_call() for now. - * bpf_map_delete_elem() can be allowed in the future, - * while bpf_map_update_elem() must only be done via syscall - */ - return -EINVAL; - - if (func_id == BPF_FUNC_tail_call && - map->map_type != BPF_MAP_TYPE_PROG_ARRAY) - /* don't allow any other map type to be passed into - * bpf_tail_call() - */ - return -EINVAL; + err = check_map_func_compatibility(map, func_id); + if (err) + return err; return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a041adee90..ef9936df1b04 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -158,6 +158,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (!event) + return -ENOENT; + + /* + * we don't know if the function is run successfully by the + * return value. It can be judged in other places, such as + * eBPF programs. + */ + return perf_event_read_local(event); +} + +const struct bpf_func_proto bpf_perf_event_read_proto = { + .func = bpf_perf_event_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -183,6 +212,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; default: return NULL; } -- cgit v1.2.3 From 51e0e5d8124ece158927a4c2288c0929d3b53aa3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 10 Aug 2015 21:15:53 +0200 Subject: ieee802154: 6lowpan: remove multiple lowpan per wpan support We currently supports multiple lowpan interfaces per wpan interface. I never saw any use case into such functionality. We drop this feature now because it's much easier do deal with address changes inside the under laying wpan interface. This patch removes the multiple lowpan interface and adds a lowpan_dev netdev pointer into the wpan_dev, if this pointer isn't null the wpan interface belongs to the assigned lowpan interface. Reviewed-by: Stefan Schmidt Tested-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 3 ++ net/ieee802154/6lowpan/6lowpan_i.h | 8 ----- net/ieee802154/6lowpan/core.c | 67 +++++++++++--------------------------- net/ieee802154/6lowpan/rx.c | 38 +++++---------------- 4 files changed, 30 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 382f94b59f2f..e53b6bfda976 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -173,6 +173,9 @@ struct wpan_dev { struct list_head list; struct net_device *netdev; + /* lowpan interface, set when the wpan_dev belongs to one lowpan_dev */ + struct net_device *lowpan_dev; + u32 identifier; /* MAC PIB */ diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index e50f69da78eb..923b680adb61 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -37,15 +37,9 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) } } -struct lowpan_dev_record { - struct net_device *ldev; - struct list_head list; -}; - /* private device info */ struct lowpan_dev_info { struct net_device *real_dev; /* real WPAN device ptr */ - struct mutex dev_list_mtx; /* mutex for list ops */ u16 fragment_tag; }; @@ -55,8 +49,6 @@ lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) return netdev_priv(dev); } -extern struct list_head lowpan_devices; - int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); void lowpan_net_frag_exit(void); int lowpan_net_frag_init(void); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index f20a387a1011..a4edee8fdc79 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -52,9 +52,6 @@ #include "6lowpan_i.h" -LIST_HEAD(lowpan_devices); -static int lowpan_open_count; - static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -114,7 +111,6 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_device *real_dev; - struct lowpan_dev_record *entry; int ret; ASSERT_RTNL(); @@ -133,31 +129,19 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; } - lowpan_dev_info(dev)->real_dev = real_dev; - mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { + if (real_dev->ieee802154_ptr->lowpan_dev) { dev_put(real_dev); - lowpan_dev_info(dev)->real_dev = NULL; - return -ENOMEM; + return -EBUSY; } - entry->ldev = dev; - + lowpan_dev_info(dev)->real_dev = real_dev; /* Set the lowpan hardware address to the wpan hardware address. */ memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - INIT_LIST_HEAD(&entry->list); - list_add_tail(&entry->list, &lowpan_devices); - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - ret = register_netdevice(dev); if (ret >= 0) { - if (!lowpan_open_count) - lowpan_rx_init(); - lowpan_open_count++; + real_dev->ieee802154_ptr->lowpan_dev = dev; + lowpan_rx_init(); } return ret; @@ -167,27 +151,12 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head) { struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry, *tmp; ASSERT_RTNL(); - lowpan_open_count--; - if (!lowpan_open_count) - lowpan_rx_exit(); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (entry->ldev == dev) { - list_del(&entry->list); - kfree(entry); - } - } - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); - - unregister_netdevice_queue(dev, head); - + lowpan_rx_exit(); + real_dev->ieee802154_ptr->lowpan_dev = NULL; + unregister_netdevice(dev); dev_put(real_dev); } @@ -214,19 +183,21 @@ static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - LIST_HEAD(del_list); - struct lowpan_dev_record *entry, *tmp; if (dev->type != ARPHRD_IEEE802154) goto out; - if (event == NETDEV_UNREGISTER) { - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (lowpan_dev_info(entry->ldev)->real_dev == dev) - lowpan_dellink(entry->ldev, &del_list); - } - - unregister_netdevice_many(&del_list); + switch (event) { + case NETDEV_UNREGISTER: + /* Check if wpan interface is unregistered that we + * also delete possible lowpan interfaces which belongs + * to the wpan interface. + */ + if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev) + lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL); + break; + default: + break; } out: diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c index 4be1d289ab2d..d6f5e8ee6fda 100644 --- a/net/ieee802154/6lowpan/rx.c +++ b/net/ieee802154/6lowpan/rx.c @@ -15,36 +15,14 @@ #include "6lowpan_i.h" -static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) +static int lowpan_give_skb_to_device(struct sk_buff *skb, + struct net_device *dev) { - struct lowpan_dev_record *entry; - struct sk_buff *skb_cp; - int stat = NET_RX_SUCCESS; - + skb->dev = dev->ieee802154_ptr->lowpan_dev; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { - skb_cp = skb_copy(skb, GFP_ATOMIC); - if (!skb_cp) { - kfree_skb(skb); - rcu_read_unlock(); - return NET_RX_DROP; - } - - skb_cp->dev = entry->ldev; - stat = netif_rx(skb_cp); - if (stat == NET_RX_DROP) - break; - } - rcu_read_unlock(); - - consume_skb(skb); - - return stat; + return netif_rx(skb); } static int @@ -109,7 +87,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { /* Pull off the 1-byte of 6lowpan header. */ skb_pull(skb, 1); - return lowpan_give_skb_to_devices(skb, NULL); + return lowpan_give_skb_to_device(skb, dev); } else { switch (skb->data[0] & 0xe0) { case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ @@ -117,7 +95,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, if (ret < 0) goto drop_skb; - return lowpan_give_skb_to_devices(skb, NULL); + return lowpan_give_skb_to_device(skb, dev); case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); if (ret == 1) { @@ -125,7 +103,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, if (ret < 0) goto drop_skb; - return lowpan_give_skb_to_devices(skb, NULL); + return lowpan_give_skb_to_device(skb, dev); } else if (ret == -1) { return NET_RX_DROP; } else { @@ -138,7 +116,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, if (ret < 0) goto drop_skb; - return lowpan_give_skb_to_devices(skb, NULL); + return lowpan_give_skb_to_device(skb, dev); } else if (ret == -1) { return NET_RX_DROP; } else { -- cgit v1.2.3 From c91208d819c814e7f418c7a083059cf533ad0396 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 10 Aug 2015 21:15:58 +0200 Subject: ieee802154: add ack request default handling This patch introduce a new mib entry which isn't part of 802.15.4 but useful as default behaviour to set the ack request bit or not if we don't know if the ack request bit should set. This is currently used for stacks like IEEE 802.15.4 6LoWPAN. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/cfg802154.h | 5 +++++ include/net/nl802154.h | 4 ++++ net/ieee802154/6lowpan/tx.c | 2 +- net/ieee802154/nl802154.c | 33 +++++++++++++++++++++++++++++++++ net/ieee802154/rdev-ops.h | 13 +++++++++++++ net/ieee802154/trace.h | 19 +++++++++++++++++++ net/mac802154/cfg.c | 11 +++++++++++ 7 files changed, 86 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index e53b6bfda976..76b1ffaea863 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -63,6 +63,8 @@ struct cfg802154_ops { s8 max_frame_retries); int (*set_lbt_mode)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode); + int (*set_ackreq_default)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, bool ackreq); }; static inline bool @@ -196,6 +198,9 @@ struct wpan_dev { bool lbt; bool promiscuous_mode; + + /* fallback for acknowledgment bit setting */ + bool ackreq; }; #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) diff --git a/include/net/nl802154.h b/include/net/nl802154.h index b0ab530d28cd..cf2713d8b975 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -52,6 +52,8 @@ enum nl802154_commands { NL802154_CMD_SET_LBT_MODE, + NL802154_CMD_SET_ACKREQ_DEFAULT, + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ @@ -104,6 +106,8 @@ enum nl802154_attrs { NL802154_ATTR_SUPPORTED_COMMANDS, + NL802154_ATTR_ACKREQ_DEFAULT, + /* add attributes here, update the policy in nl802154.c */ __NL802154_ATTR_AFTER_LAST, diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index 2597abbf7f4b..1bf4a304b5c4 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -224,7 +224,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev) } else { da.mode = IEEE802154_ADDR_LONG; da.extended_addr = ieee802154_devaddr_from_raw(daddr); - cb->ackreq = wpan_dev->frame_retries >= 0; + cb->ackreq = wpan_dev->ackreq; } return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 68f24016860c..1b00a14850cb 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -230,6 +230,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED }, [NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED }, + + [NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 }, }; /* message building helper */ @@ -458,6 +460,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS); CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES); CMD(set_lbt_mode, SET_LBT_MODE); + CMD(set_ackreq_default, SET_ACKREQ_DEFAULT); if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) CMD(set_tx_power, SET_TX_POWER); @@ -656,6 +659,10 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt)) goto nla_put_failure; + /* ackreq default behaviour */ + if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -1042,6 +1049,24 @@ static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info) return rdev_set_lbt_mode(rdev, wpan_dev, mode); } +static int +nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev = dev->ieee802154_ptr; + bool ackreq; + + if (netif_running(dev)) + return -EBUSY; + + if (!info->attrs[NL802154_ATTR_ACKREQ_DEFAULT]) + return -EINVAL; + + ackreq = !!nla_get_u8(info->attrs[NL802154_ATTR_ACKREQ_DEFAULT]); + return rdev_set_ackreq_default(rdev, wpan_dev, ackreq); +} + #define NL802154_FLAG_NEED_WPAN_PHY 0x01 #define NL802154_FLAG_NEED_NETDEV 0x02 #define NL802154_FLAG_NEED_RTNL 0x04 @@ -1248,6 +1273,14 @@ static const struct genl_ops nl802154_ops[] = { .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, + { + .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, + .doit = nl802154_set_ackreq_default, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_NEED_RTNL, + }, }; /* initialisation/exit functions */ diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 8d5960a37195..03b357501cc5 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -195,4 +195,17 @@ rdev_set_lbt_mode(struct cfg802154_registered_device *rdev, return ret; } +static inline int +rdev_set_ackreq_default(struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, bool ackreq) +{ + int ret; + + trace_802154_rdev_set_ackreq_default(&rdev->wpan_phy, wpan_dev, + ackreq); + ret = rdev->ops->set_ackreq_default(&rdev->wpan_phy, wpan_dev, ackreq); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + #endif /* __CFG802154_RDEV_OPS */ diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index 4399b7fbaa31..9a471e41ec73 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -275,6 +275,25 @@ TRACE_EVENT(802154_rdev_set_lbt_mode, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) ); +TRACE_EVENT(802154_rdev_set_ackreq_default, + TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + bool ackreq), + TP_ARGS(wpan_phy, wpan_dev, ackreq), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(bool, ackreq) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->ackreq = ackreq; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT + ", ackreq default: %s", WPAN_PHY_PR_ARG, + WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq)) +); + TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index cecfcda09aac..c865ebb2ace2 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -256,6 +256,16 @@ ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, return 0; } +static int +ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, bool ackreq) +{ + ASSERT_RTNL(); + + wpan_dev->ackreq = ackreq; + return 0; +} + const struct cfg802154_ops mac802154_config_ops = { .add_virtual_intf_deprecated = ieee802154_add_iface_deprecated, .del_virtual_intf_deprecated = ieee802154_del_iface_deprecated, @@ -273,4 +283,5 @@ const struct cfg802154_ops mac802154_config_ops = { .set_max_csma_backoffs = ieee802154_set_max_csma_backoffs, .set_max_frame_retries = ieee802154_set_max_frame_retries, .set_lbt_mode = ieee802154_set_lbt_mode, + .set_ackreq_default = ieee802154_set_ackreq_default, }; -- cgit v1.2.3 From 158e92185075184ebc5f25bab61fdd598693e28d Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 7 Aug 2015 20:22:51 +0200 Subject: Bluetooth: preparation for new connect procedure Currently, when trying to connect to already paired device that just rotated its RPA MAC address, old address would be used and connection would fail. In order to fix that, kernel must scan and receive advertisement with fresh RPA before connecting. This patch adds some fields to hci_conn_params, in preparation to new connect procedure. explicit_connect will be used to override any current auto_connect action, and connect to device when ad is received. HCI_AUTO_CONN_EXPLICIT was added to auto_connect enum. When this value will be used, explicit connect is the only action, and params can be removed after successful connection. HCI_CONN_SCANNING is added to hci_conn flags. When it's set, connect is scan phase. It gets cleared when advertisement is received, and HCI_OP_LE_CREATE_CONN is sent. Signed-off-by: Jakub Pawlowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2a6b0919e23f..c8d2b5a89d08 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -512,9 +512,11 @@ struct hci_conn_params { HCI_AUTO_CONN_DIRECT, HCI_AUTO_CONN_ALWAYS, HCI_AUTO_CONN_LINK_LOSS, + HCI_AUTO_CONN_EXPLICIT, } auto_connect; struct hci_conn *conn; + bool explicit_connect; }; extern struct list_head hci_dev_list; @@ -639,6 +641,7 @@ enum { HCI_CONN_DROP, HCI_CONN_PARAM_REMOVAL_PEND, HCI_CONN_NEW_LINK_KEY, + HCI_CONN_SCANNING, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) -- cgit v1.2.3 From e7d9ab731ac7babaf2e1b7b5e2280f5f555d263f Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 7 Aug 2015 20:22:52 +0200 Subject: Bluetooth: add hci_lookup_le_connect This patch adds hci_lookup_le_connect method, that will be used to check wether outgoing le connection attempt is in progress. Signed-off-by: Jakub Pawlowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 20 ++++++++++++++++++++ net/bluetooth/hci_conn.c | 5 ++--- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/hci_request.c | 6 ++---- net/bluetooth/mgmt.c | 2 +- 5 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c8d2b5a89d08..f0a9fc1d06e0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -811,6 +811,26 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == LE_LINK && c->state == BT_CONNECT && + !test_bit(HCI_CONN_SCANNING, &c->flags)) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2c48bf0b5afb..0b4d919c8d96 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -645,7 +645,7 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + conn = hci_lookup_le_connect(hdev); if (!conn) goto done; @@ -759,8 +759,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, /* Since the controller supports only one LE connection attempt at a * time, we return -EBUSY if there is any connection attempt running. */ - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) + if (hci_lookup_le_connect(hdev)) return ERR_PTR(-EBUSY); /* When given an identity address with existing identity diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 218d7dfc342f..128c5b70ee5e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1059,7 +1059,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_set_flag(hdev, HCI_LE_ADV); - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + conn = hci_lookup_le_connect(hdev); if (conn) queue_delayed_work(hdev->workqueue, &conn->le_conn_timeout, @@ -4447,7 +4447,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + conn = hci_lookup_le_connect(hdev); if (!conn) { conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); if (!conn) { diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index d6025d6e6d59..b7369220c9ef 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -317,7 +317,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) * address be updated at the next cycle. */ if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + hci_lookup_le_connect(hdev)) { BT_DBG("Deferring random address update"); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return; @@ -479,7 +479,6 @@ void hci_update_page_scan(struct hci_dev *hdev) void __hci_update_background_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; - struct hci_conn *conn; if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || @@ -529,8 +528,7 @@ void __hci_update_background_scan(struct hci_request *req) * since some controllers are not able to scan and connect at * the same time. */ - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) + if (hci_lookup_le_connect(hdev)) return; /* If controller is currently scanning, we stop it to ensure we diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7ab191589541..35418bbe6b15 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4210,7 +4210,7 @@ static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) /* Don't let discovery abort an outgoing connection attempt * that's using directed advertising. */ - if (hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + if (hci_lookup_le_connect(hdev)) { *status = MGMT_STATUS_REJECTED; return false; } -- cgit v1.2.3 From f75113a26008980ca13834fb6573145523596776 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 7 Aug 2015 20:22:53 +0200 Subject: Bluetooth: add hci_connect_le_scan Currently, when trying to connect to already paired device that just rotated its RPA MAC address, old address would be used and connection would fail. In order to fix that, kernel must scan and receive advertisement with fresh RPA before connecting. This patch adds hci_connect_le_scan with dependencies, new method that will be used to connect to remote LE devices. Instead of just sending connect request, it adds a device to whitelist. Later patches will make use of this whitelist to send conenct request when advertisement is received, and properly handle timeouts. Signed-off-by: Jakub Pawlowski Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 ++ net/bluetooth/hci_conn.c | 174 +++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_core.c | 33 ++++++++ 3 files changed, 213 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f0a9fc1d06e0..9e1a59e01fa2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -846,6 +846,9 @@ void hci_chan_del(struct hci_chan *chan); void hci_chan_list_flush(struct hci_conn *conn); struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); +struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, + u8 dst_type, u8 sec_level, + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role); @@ -1011,6 +1014,9 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type); +struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, + bdaddr_t *addr, + u8 addr_type); void hci_uuids_clear(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0b4d919c8d96..534feb7956a3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -64,6 +64,48 @@ static void hci_le_create_connection_cancel(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); } +/* This function requires the caller holds hdev->lock */ +static void hci_connect_le_scan_cleanup(struct hci_conn *conn) +{ + struct hci_conn_params *params; + struct smp_irk *irk; + bdaddr_t *bdaddr; + u8 bdaddr_type; + + bdaddr = &conn->dst; + bdaddr_type = conn->dst_type; + + /* Check if we need to convert to identity address */ + irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type); + if (irk) { + bdaddr = &irk->bdaddr; + bdaddr_type = irk->addr_type; + } + + params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type); + if (!params) + return; + + /* The connection attempt was doing scan for new RPA, and is + * in scan phase. If params are not associated with any other + * autoconnect action, remove them completely. If they are, just unmark + * them as waiting for connection, by clearing explicit_connect field. + */ + if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) + hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); + else + params->explicit_connect = false; +} + +/* This function requires the caller holds hdev->lock */ +static void hci_connect_le_scan_remove(struct hci_conn *conn) +{ + hci_connect_le_scan_cleanup(conn); + + hci_conn_hash_del(conn->hdev, conn); + hci_update_background_scan(conn->hdev); +} + static void hci_acl_create_connection(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -858,6 +900,138 @@ done: return conn; } +static void hci_connect_le_scan_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + struct hci_conn *conn; + + if (!status) + return; + + BT_ERR("Failed to add device to auto conn whitelist: status 0x%2.2x", + status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + hci_le_conn_failed(conn, status); + + hci_dev_unlock(hdev); +} + +static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) +{ + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + if (!conn) + return false; + + if (conn->dst_type != type) + return false; + + if (conn->state != BT_CONNECTED) + return false; + + return true; +} + +/* This function requires the caller holds hdev->lock */ +static int hci_explicit_conn_params_set(struct hci_request *req, + bdaddr_t *addr, u8 addr_type) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + + if (is_connected(hdev, addr, addr_type)) + return -EISCONN; + + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -EIO; + + /* If we created new params, or existing params were marked as disabled, + * mark them to be used just once to connect. + */ + if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { + params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + list_del_init(¶ms->action); + list_add(¶ms->action, &hdev->pend_le_conns); + } + + params->explicit_connect = true; + __hci_update_background_scan(req); + + BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, + params->auto_connect); + + return 0; +} + +/* This function requires the caller holds hdev->lock */ +struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, + u8 dst_type, u8 sec_level, + u16 conn_timeout, u8 role) +{ + struct hci_conn *conn; + struct hci_request req; + int err; + + /* Let's make sure that le is enabled.*/ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + if (lmp_le_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + + return ERR_PTR(-EOPNOTSUPP); + } + + /* Some devices send ATT messages as soon as the physical link is + * established. To be able to handle these ATT messages, the user- + * space first establishes the connection and then starts the pairing + * process. + * + * So if a hci_conn object already exists for the following connection + * attempt, we simply update pending_sec_level and auth_type fields + * and return the object found. + */ + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (conn) { + if (conn->pending_sec_level < sec_level) + conn->pending_sec_level = sec_level; + goto done; + } + + BT_DBG("requesting refresh of dst_addr"); + + conn = hci_conn_add(hdev, LE_LINK, dst, role); + if (!conn) + return ERR_PTR(-ENOMEM); + + hci_req_init(&req, hdev); + + if (hci_explicit_conn_params_set(&req, dst, dst_type) < 0) + return ERR_PTR(-EBUSY); + + conn->state = BT_CONNECT; + set_bit(HCI_CONN_SCANNING, &conn->flags); + + err = hci_req_run(&req, hci_connect_le_scan_complete); + if (err && err != -ENODATA) { + hci_conn_del(conn); + return ERR_PTR(err); + } + + conn->dst_type = dst_type; + conn->sec_level = BT_SECURITY_LOW; + conn->pending_sec_level = sec_level; + conn->conn_timeout = conn_timeout; + +done: + hci_conn_hold(conn); + return conn; +} + struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc43b6490555..adcbc74c2432 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2847,6 +2847,30 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, return NULL; } +/* This function requires the caller holds hdev->lock */ +struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, + bdaddr_t *addr, + u8 addr_type) +{ + struct hci_conn_params *param; + + list_for_each_entry(param, &hdev->pend_le_conns, action) { + if (bacmp(¶m->addr, addr) == 0 && + param->addr_type == addr_type && + param->explicit_connect) + return param; + } + + list_for_each_entry(param, &hdev->pend_le_reports, action) { + if (bacmp(¶m->addr, addr) == 0 && + param->addr_type == addr_type && + param->explicit_connect) + return param; + } + + return NULL; +} + /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) @@ -2916,6 +2940,15 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev) list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) { if (params->auto_connect != HCI_AUTO_CONN_DISABLED) continue; + + /* If trying to estabilish one time connection to disabled + * device, leave the params, but mark them as just once. + */ + if (params->explicit_connect) { + params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + continue; + } + list_del(¶ms->list); kfree(params); } -- cgit v1.2.3 From 7a76a021cd5a292be875fbc616daf03eab1e6996 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 7 Aug 2015 09:32:21 -0700 Subject: net-timestamp: Update skb_complete_tx_timestamp comment After "62bccb8 net-timestamp: Make the clone operation stand-alone from phy timestamping" the hwtstamps parameter of skb_complete_tx_timestamp() may no longer be NULL. Signed-off-by: Benjamin Poirier Cc: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..22b6d9ca1654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2884,11 +2884,11 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers - * must call this function to return the skb back to the stack, with - * or without a timestamp. + * must call this function to return the skb back to the stack with a + * timestamp. * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps, may be NULL if not available + * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, -- cgit v1.2.3 From a7854037da006a7472c48773e3190db55217ec9b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Aug 2015 19:40:45 +0300 Subject: bridge: netlink: add support for vlan_filtering attribute This patch adds the ability to toggle the vlan filtering support via netlink. Since we're already running with rtnl in .changelink() we don't need to take any additional locks. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 14 +++++++++++++- net/bridge/br_private.h | 7 +++++++ net/bridge/br_vlan.c | 18 ++++++++++++------ 4 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f24ec99a2262..d450be36add2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -230,6 +230,7 @@ enum { IFLA_BR_AGEING_TIME, IFLA_BR_STP_STATE, IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 91a2e08c2bb8..6eb683d8e0c5 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -724,6 +724,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 }, [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -771,6 +772,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br_stp_set_bridge_priority(br, priority); } + if (data[IFLA_BR_VLAN_FILTERING]) { + u8 vlan_filter = nla_get_u8(data[IFLA_BR_VLAN_FILTERING]); + + err = __br_vlan_filter_toggle(br, vlan_filter); + if (err) + return err; + } + return 0; } @@ -782,6 +791,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */ nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ 0; } @@ -794,13 +804,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 ageing_time = jiffies_to_clock_t(br->ageing_time); u32 stp_enabled = br->stp_enabled; u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; + u8 vlan_enabled = br_vlan_enabled(br); if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) || nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || - nla_put_u16(skb, IFLA_BR_PRIORITY, priority)) + nla_put_u16(skb, IFLA_BR_PRIORITY, priority) || + nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled)) return -EMSGSIZE; return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e2cb359f9dd3..3d95647039d0 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -614,6 +614,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); +int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); @@ -771,6 +772,12 @@ static inline int br_vlan_enabled(struct net_bridge *br) { return 0; } + +static inline int __br_vlan_filter_toggle(struct net_bridge *br, + unsigned long val) +{ + return -EOPNOTSUPP; +} #endif struct nf_br_ops { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 0d41f81838ff..3cef6892c0bb 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -468,21 +468,27 @@ void br_recalculate_fwd_mask(struct net_bridge *br) ~(1u << br->group_addr[5]); } -int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { - if (!rtnl_trylock()) - return restart_syscall(); - if (br->vlan_enabled == val) - goto unlock; + return 0; br->vlan_enabled = val; br_manage_promisc(br); recalculate_group_addr(br); br_recalculate_fwd_mask(br); -unlock: + return 0; +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + if (!rtnl_trylock()) + return restart_syscall(); + + __br_vlan_filter_toggle(br, val); rtnl_unlock(); + return 0; } -- cgit v1.2.3 From fb811395cd5a71b9e94a068f524a6f4a21b67bdb Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Fri, 7 Aug 2015 11:10:37 -0700 Subject: net: add explicit logging and stat for neighbour table overflow Add an explicit neighbour table overflow message (ratelimited) and statistic to make diagnosing neighbour table overflows tractable in the wild. Diagnosing a neighbour table overflow can be quite difficult in the wild because there is no explicit dmesg logged. Callers to neighbour code seem to use net_dbg_ratelimit when the neighbour call fails which means the "base message" is not emitted and the callback suppressed messages from the ratelimiting can end-up juxtaposed with unrelated messages. Further, a forced garbage collection will increment a stat on each call whether it was successful in freeing-up a table entry or not, so that statistic is only a hint. So, add a net_info_ratelimited message and explicit statistic to the neighbour code. Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + include/uapi/linux/neighbour.h | 1 + net/core/neighbour.c | 14 ++++++++++---- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index bd33e66f49aa..8b683841e574 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -125,6 +125,7 @@ struct neigh_statistics { unsigned long forced_gc_runs; /* number of forced GC runs */ unsigned long unres_discards; /* number of unresolved drops */ + unsigned long table_fulls; /* times even gc couldn't help */ }; #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 2e35c61bbdd1..788655bfa0f3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -106,6 +106,7 @@ struct ndt_stats { __u64 ndts_rcv_probes_ucast; __u64 ndts_periodic_gc_runs; __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; }; enum { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 84195dacb8b6..2b515ba7e94f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -274,8 +274,12 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) { if (!neigh_forced_gc(tbl) && - entries >= tbl->gc_thresh3) + entries >= tbl->gc_thresh3) { + net_info_ratelimited("%s: neighbor table overflow!\n", + tbl->id); + NEIGH_CACHE_STAT_INC(tbl, table_fulls); goto out_entries; + } } n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); @@ -1849,6 +1853,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; ndst.ndts_forced_gc_runs += st->forced_gc_runs; + ndst.ndts_table_fulls += st->table_fulls; } if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) @@ -2717,12 +2722,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2739,7 +2744,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->periodic_gc_runs, st->forced_gc_runs, - st->unres_discards + st->unres_discards, + st->table_fulls ); return 0; -- cgit v1.2.3 From 2e15ea390e6f4466655066d97e22ec66870a042c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 7 Aug 2015 23:51:42 -0700 Subject: ip_gre: Add support to collect tunnel metadata. Following patch create new tunnel flag which enable tunnel metadata collection on given device. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 7 +- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ip_gre.c | 195 +++++++++++++++++++++++++++++++++++++---- net/ipv4/ip_tunnel.c | 37 ++++++-- net/ipv4/ipip.c | 2 +- net/ipv6/sit.c | 2 +- 6 files changed, 216 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 47984415f5d1..984dbfa15e13 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -82,6 +82,8 @@ struct ip_tunnel_dst { __be32 saddr; }; +struct metadata_dst; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -115,6 +117,7 @@ struct ip_tunnel { unsigned int prl_count; /* # of entries in PRL */ int ip_tnl_net_id; struct gro_cells gro_cells; + bool collect_md; }; #define TUNNEL_CSUM __cpu_to_be16(0x01) @@ -149,6 +152,7 @@ struct tnl_ptk_info { struct ip_tunnel_net { struct net_device *fb_tunnel_dev; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; + struct ip_tunnel __rcu *collect_md_tun; }; struct ip_tunnel_encap_ops { @@ -235,7 +239,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 key); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, - const struct tnl_ptk_info *tpi, bool log_ecn_error); + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index bd3cc11a431f..af4de90ba27d 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -112,6 +112,7 @@ enum { IFLA_GRE_ENCAP_FLAGS, IFLA_GRE_ENCAP_SPORT, IFLA_GRE_ENCAP_DPORT, + IFLA_GRE_COLLECT_METADATA, __IFLA_GRE_MAX, }; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5fd706473c73..554a760c2cd0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -200,9 +202,29 @@ static int ipgre_err(struct sk_buff *skb, u32 info, return PACKET_RCVD; } +static __be64 key_to_tunnel_id(__be32 key) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)((__force u32)key); +#else + return (__force __be64)((__force u64)key << 32); +#endif +} + +/* Returns the least-significant 32 bits of a __be64. */ +static __be32 tunnel_id_to_key(__be64 x) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)x; +#else + return (__force __be32)((__force u64)x >> 32); +#endif +} + static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { struct net *net = dev_net(skb->dev); + struct metadata_dst *tun_dst = NULL; struct ip_tunnel_net *itn; const struct iphdr *iph; struct ip_tunnel *tunnel; @@ -218,40 +240,162 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) if (tunnel) { skb_pop_mac_header(skb); - ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); + if (tunnel->collect_md) { + struct ip_tunnel_info *info; + + tun_dst = metadata_dst_alloc(0, GFP_ATOMIC); + if (!tun_dst) + return PACKET_REJECT; + + info = &tun_dst->u.tun_info; + info->key.ipv4_src = iph->saddr; + info->key.ipv4_dst = iph->daddr; + info->key.ipv4_tos = iph->tos; + info->key.ipv4_ttl = iph->ttl; + + info->mode = IP_TUNNEL_INFO_RX; + info->key.tun_flags = tpi->flags & + (TUNNEL_CSUM | TUNNEL_KEY); + info->key.tun_id = key_to_tunnel_id(tpi->key); + + info->key.tp_src = 0; + info->key.tp_dst = 0; + } + + ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); return PACKET_RCVD; } return PACKET_REJECT; } +static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, + __be16 proto, __be32 key, __be32 seq) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + skb_reset_transport_header(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(flags); + greh->protocol = proto; + + if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (flags & TUNNEL_SEQ) { + *ptr = seq; + ptr--; + } + if (flags & TUNNEL_KEY) { + *ptr = key; + ptr--; + } + if (flags & TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} + static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct tnl_ptk_info tpi; - tpi.flags = tunnel->parms.o_flags; - tpi.proto = proto; - tpi.key = tunnel->parms.o_key; if (tunnel->parms.o_flags & TUNNEL_SEQ) tunnel->o_seqno++; - tpi.seq = htonl(tunnel->o_seqno); /* Push GRE header. */ - gre_build_header(skb, &tpi, tunnel->tun_hlen); - - skb_set_inner_protocol(skb, tpi.proto); + build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + skb_set_inner_protocol(skb, proto); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel_info *tun_info; + struct net *net = dev_net(dev); + const struct ip_tunnel_key *key; + struct flowi4 fl; + struct rtable *rt; + int min_headroom; + int tunnel_hlen; + __be16 df, flags; + int err; + + tun_info = skb_tunnel_info(skb, AF_INET); + if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX)) + goto err_free_skb; + + key = &tun_info->key; + memset(&fl, 0, sizeof(fl)); + fl.daddr = key->ipv4_dst; + fl.saddr = key->ipv4_src; + fl.flowi4_tos = RT_TOS(key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + goto err_free_skb; + + tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + tunnel_hlen + sizeof(struct iphdr); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + /* Push Tunnel header. */ + skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)); + if (IS_ERR(skb)) { + skb = NULL; + goto err_free_rt; + } + + flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + tunnel_id_to_key(tun_info->key.tun_id), 0); + + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr, + key->ipv4_dst, IPPROTO_GRE, + key->ipv4_tos, key->ipv4_ttl, df, false); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + return; + +err_free_rt: + ip_rt_put(rt); +err_free_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; +} + static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; + if (tunnel->collect_md) { + gre_fb_xmit(skb, dev); + return NETDEV_TX_OK; + } + if (dev->header_ops) { /* Need space for new headers */ if (skb_cow_head(skb, dev->needed_headroom - @@ -277,7 +421,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, goto out; __gre_xmit(skb, dev, tnl_params, skb->protocol); - return NETDEV_TX_OK; free_skb: @@ -292,6 +435,11 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); + if (tunnel->collect_md) { + gre_fb_xmit(skb, dev); + return NETDEV_TX_OK; + } + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; @@ -300,7 +448,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, goto free_skb; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); - return NETDEV_TX_OK; free_skb: @@ -596,8 +743,10 @@ out: return ipgre_tunnel_validate(tb, data); } -static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], - struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct net_device *dev, + struct nlattr *data[], + struct nlattr *tb[], + struct ip_tunnel_parm *parms) { memset(parms, 0, sizeof(*parms)); @@ -635,6 +784,12 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_GRE_COLLECT_METADATA]) { + struct ip_tunnel *t = netdev_priv(dev); + + t->collect_md = true; + } } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -712,7 +867,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, return err; } - ipgre_netlink_parms(data, tb, &p); + ipgre_netlink_parms(dev, data, tb, &p); return ip_tunnel_newlink(dev, tb, &p); } @@ -730,7 +885,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipgre_netlink_parms(data, tb, &p); + ipgre_netlink_parms(dev, data, tb, &p); return ip_tunnel_changelink(dev, tb, &p); } @@ -765,6 +920,8 @@ static size_t ipgre_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_GRE_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -796,6 +953,11 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) t->encap.flags)) goto nla_put_failure; + if (t->collect_md) { + if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) + goto nla_put_failure; + } + return 0; nla_put_failure: @@ -817,6 +979,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ipgre_link_ops __read_mostly = { @@ -851,7 +1014,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { static int __net_init ipgre_tap_init_net(struct net *net) { - return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); + return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } static void __net_exit ipgre_tap_exit_net(struct net *net) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 626d9e56a6bd..cbb51f3fac06 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -230,10 +230,13 @@ skip_key_lookup: if (cand) return cand; + t = rcu_dereference(itn->collect_md_tun); + if (t) + return t; + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) return netdev_priv(itn->fb_tunnel_dev); - return NULL; } EXPORT_SYMBOL_GPL(ip_tunnel_lookup); @@ -261,11 +264,15 @@ static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) { struct hlist_head *head = ip_bucket(itn, &t->parms); + if (t->collect_md) + rcu_assign_pointer(itn->collect_md_tun, t); hlist_add_head_rcu(&t->hash_node, head); } -static void ip_tunnel_del(struct ip_tunnel *t) +static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) { + if (t->collect_md) + rcu_assign_pointer(itn->collect_md_tun, NULL); hlist_del_init_rcu(&t->hash_node); } @@ -419,7 +426,8 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, } int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, - const struct tnl_ptk_info *tpi, bool log_ecn_error) + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error) { struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); @@ -478,6 +486,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, skb->dev = tunnel->dev; } + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -806,7 +817,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, struct ip_tunnel_parm *p, bool set_mtu) { - ip_tunnel_del(t); + ip_tunnel_del(itn, t); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; t->parms.i_key = p->i_key; @@ -967,7 +978,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); if (itn->fb_tunnel_dev != dev) { - ip_tunnel_del(netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); unregister_netdevice_queue(dev, head); } } @@ -1072,8 +1083,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], nt = netdev_priv(dev); itn = net_generic(net, nt->ip_tnl_net_id); - if (ip_tunnel_find(itn, p, dev->type)) - return -EEXIST; + if (nt->collect_md) { + if (rtnl_dereference(itn->collect_md_tun)) + return -EEXIST; + } else { + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + } nt->net = net; nt->parms = *p; @@ -1089,7 +1105,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], dev->mtu = mtu; ip_tunnel_add(itn, nt); - out: return err; } @@ -1163,6 +1178,10 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5; + if (tunnel->collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); @@ -1176,7 +1195,7 @@ void ip_tunnel_uninit(struct net_device *dev) itn = net_generic(net, tunnel->ip_tnl_net_id); /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) - ip_tunnel_del(netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); ip_tunnel_dst_reset_all(tunnel); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 254238daf58b..f34c31defafe 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -198,7 +198,7 @@ static int ipip_rcv(struct sk_buff *skb) goto drop; if (iptunnel_pull_header(skb, 0, tpi.proto)) goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } return -1; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac35a28599be..94428fd85b2f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb) goto drop; if (iptunnel_pull_header(skb, 0, tpi.proto)) goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); } return 1; -- cgit v1.2.3 From b2acd1dc3949cd60c571844d495594f05f0351f4 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 7 Aug 2015 23:51:47 -0700 Subject: openvswitch: Use regular GRE net_device instead of vport Using GRE tunnel meta data collection feature, we can implement OVS GRE vport. This patch removes all of the OVS specific GRE code and make OVS use a ip_gre net_device. Minimal GRE vport is kept to handle compatibility with current userspace application. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/gre.h | 12 +-- net/ipv4/gre_demux.c | 34 ------- net/ipv4/ip_gre.c | 36 +++++++ net/openvswitch/Kconfig | 2 +- net/openvswitch/vport-gre.c | 237 ++++---------------------------------------- 5 files changed, 61 insertions(+), 260 deletions(-) (limited to 'include') diff --git a/include/net/gre.h b/include/net/gre.h index b53182018743..e3e08459bf67 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -33,16 +33,8 @@ struct gre_cisco_protocol { int gre_cisco_register(struct gre_cisco_protocol *proto); int gre_cisco_unregister(struct gre_cisco_protocol *proto); -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, - int hdr_len); - -static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb, - bool csum) -{ - return iptunnel_handle_offloads(skb, csum, - csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); -} - +struct net_device *gretap_fb_dev_create(struct net *net, const char *name, + u8 name_assign_type); static inline int ip_gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4a7b5b2a1ce3..77562e0ac66b 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -61,40 +61,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - skb_reset_transport_header(skb); - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & - (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } - } -} -EXPORT_SYMBOL_GPL(gre_build_header); - static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 554a760c2cd0..49d140200d03 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -318,6 +318,13 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } +static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, + bool csum) +{ + return iptunnel_handle_offloads(skb, csum, + csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); +} + static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel_info *tun_info; @@ -1012,6 +1019,35 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; +struct net_device *gretap_fb_dev_create(struct net *net, const char *name, + u8 name_assign_type) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + struct ip_tunnel *t; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, name, name_assign_type, + &ipgre_tap_ops, tb); + if (IS_ERR(dev)) + return dev; + + /* Configure flow based GRE device. */ + t = netdev_priv(dev); + t->collect_md = true; + + err = ipgre_newlink(net, dev, tb, NULL); + if (err < 0) + goto out; + return dev; +out: + free_netdev(dev); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gretap_fb_dev_create); + static int __net_init ipgre_tap_init_net(struct net *net) { return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 15840401a2ce..422dc0567de9 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -34,7 +34,7 @@ config OPENVSWITCH config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" depends on OPENVSWITCH - depends on NET_IPGRE_DEMUX + depends on NET_IPGRE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create GRE diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index b87656c66aaf..871801d2ac23 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -45,235 +45,43 @@ #include "datapath.h" #include "vport.h" +#include "vport-netdev.h" static struct vport_ops ovs_gre_vport_ops; -/* Returns the least-significant 32 bits of a __be64. */ -static __be32 be64_get_low32(__be64 x) +static struct vport *gre_tnl_create(const struct vport_parms *parms) { -#ifdef __BIG_ENDIAN - return (__force __be32)x; -#else - return (__force __be32)((__force u64)x >> 32); -#endif -} - -static __be16 filter_tnl_flags(__be16 flags) -{ - return flags & (TUNNEL_CSUM | TUNNEL_KEY); -} - -static struct sk_buff *__build_header(struct sk_buff *skb, - int tunnel_hlen) -{ - struct tnl_ptk_info tpi; - const struct ip_tunnel_key *tun_key; - - tun_key = &OVS_CB(skb)->egress_tun_info->key; - - skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); - if (IS_ERR(skb)) - return skb; - - tpi.flags = filter_tnl_flags(tun_key->tun_flags); - tpi.proto = htons(ETH_P_TEB); - tpi.key = be64_get_low32(tun_key->tun_id); - tpi.seq = 0; - gre_build_header(skb, &tpi, tunnel_hlen); - - return skb; -} - -static __be64 key_to_tunnel_id(__be32 key, __be32 seq) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)((__force u64)seq << 32 | (__force u32)key); -#else - return (__force __be64)((__force u64)key << 32 | (__force u32)seq); -#endif -} - -/* Called with rcu_read_lock and BH disabled. */ -static int gre_rcv(struct sk_buff *skb, - const struct tnl_ptk_info *tpi) -{ - struct ip_tunnel_info tun_info; - struct ovs_net *ovs_net; - struct vport *vport; - __be64 key; - - ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); - vport = rcu_dereference(ovs_net->vport_net.gre_vport); - if (unlikely(!vport)) - return PACKET_REJECT; - - key = key_to_tunnel_id(tpi->key, tpi->seq); - ip_tunnel_info_init(&tun_info, ip_hdr(skb), 0, 0, key, - filter_tnl_flags(tpi->flags), NULL, 0); - - ovs_vport_receive(vport, skb, &tun_info); - return PACKET_RCVD; -} - -/* Called with rcu_read_lock and BH disabled. */ -static int gre_err(struct sk_buff *skb, u32 info, - const struct tnl_ptk_info *tpi) -{ - struct ovs_net *ovs_net; + struct net *net = ovs_dp_get_net(parms->dp); + struct net_device *dev; struct vport *vport; - ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); - vport = rcu_dereference(ovs_net->vport_net.gre_vport); - - if (unlikely(!vport)) - return PACKET_REJECT; - else - return PACKET_RCVD; -} - -static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) -{ - struct net *net = ovs_dp_get_net(vport->dp); - const struct ip_tunnel_key *tun_key; - struct flowi4 fl; - struct rtable *rt; - int min_headroom; - int tunnel_hlen; - __be16 df; - int err; - - if (unlikely(!OVS_CB(skb)->egress_tun_info)) { - err = -EINVAL; - goto err_free_skb; - } - - tun_key = &OVS_CB(skb)->egress_tun_info->key; - rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto err_free_skb; - } - - tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags); - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + tunnel_hlen + sizeof(struct iphdr) - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { - int head_delta = SKB_DATA_ALIGN(min_headroom - - skb_headroom(skb) + - 16); - err = pskb_expand_head(skb, max_t(int, head_delta, 0), - 0, GFP_ATOMIC); - if (unlikely(err)) - goto err_free_rt; - } - - skb = vlan_hwaccel_push_inside(skb); - if (unlikely(!skb)) { - err = -ENOMEM; - goto err_free_rt; - } - - /* Push Tunnel header. */ - skb = __build_header(skb, tunnel_hlen); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - skb = NULL; - goto err_free_rt; + vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms); + if (IS_ERR(vport)) + return vport; + + rtnl_lock(); + dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER); + if (IS_ERR(dev)) { + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_CAST(dev); } - df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? - htons(IP_DF) : 0; - - skb->ignore_df = 1; - - return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, - tun_key->ipv4_dst, IPPROTO_GRE, - tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false); -err_free_rt: - ip_rt_put(rt); -err_free_skb: - kfree_skb(skb); - return err; -} - -static struct gre_cisco_protocol gre_protocol = { - .handler = gre_rcv, - .err_handler = gre_err, - .priority = 1, -}; - -static int gre_ports; -static int gre_init(void) -{ - int err; - - gre_ports++; - if (gre_ports > 1) - return 0; - - err = gre_cisco_register(&gre_protocol); - if (err) - pr_warn("cannot register gre protocol handler\n"); - - return err; -} - -static void gre_exit(void) -{ - gre_ports--; - if (gre_ports > 0) - return; - - gre_cisco_unregister(&gre_protocol); -} + dev_change_flags(dev, dev->flags | IFF_UP); + rtnl_unlock(); -static const char *gre_get_name(const struct vport *vport) -{ - return vport_priv(vport); + return vport; } static struct vport *gre_create(const struct vport_parms *parms) { - struct net *net = ovs_dp_get_net(parms->dp); - struct ovs_net *ovs_net; struct vport *vport; - int err; - - err = gre_init(); - if (err) - return ERR_PTR(err); - - ovs_net = net_generic(net, ovs_net_id); - if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { - vport = ERR_PTR(-EEXIST); - goto error; - } - vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); + vport = gre_tnl_create(parms); if (IS_ERR(vport)) - goto error; - - strncpy(vport_priv(vport), parms->name, IFNAMSIZ); - rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); - return vport; - -error: - gre_exit(); - return vport; -} - -static void gre_tnl_destroy(struct vport *vport) -{ - struct net *net = ovs_dp_get_net(vport->dp); - struct ovs_net *ovs_net; - - ovs_net = net_generic(net, ovs_net_id); + return vport; - RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL); - ovs_vport_deferred_free(vport); - gre_exit(); + return ovs_netdev_link(vport, parms->name); } static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, @@ -288,10 +96,9 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, static struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, - .destroy = gre_tnl_destroy, - .get_name = gre_get_name, - .send = gre_tnl_send, + .send = ovs_netdev_send, .get_egress_tun_info = gre_get_egress_tun_info, + .destroy = ovs_netdev_tunnel_destroy, .owner = THIS_MODULE, }; -- cgit v1.2.3 From 9f57c67c379d88a10e8ad676426fee5ae7341b14 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 7 Aug 2015 23:51:52 -0700 Subject: gre: Remove support for sharing GRE protocol hook. Support for sharing GREPROTO_CISCO port was added so that OVS gre port and kernel GRE devices can co-exist. After flow-based tunneling patches OVS GRE protocol processing is completely moved to ip_gre module. so there is no need for GRE protocol hook. Following patch consolidates GRE protocol related functions into ip_gre module. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/gre.h | 80 ++----------------- net/ipv4/gre_demux.c | 201 +---------------------------------------------- net/ipv4/ip_gre.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 206 insertions(+), 290 deletions(-) (limited to 'include') diff --git a/include/net/gre.h b/include/net/gre.h index e3e08459bf67..97eafdc47eea 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -4,6 +4,12 @@ #include #include +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 + #define GREPROTO_CISCO 0 #define GREPROTO_PPTP 1 #define GREPROTO_MAX 2 @@ -14,83 +20,9 @@ struct gre_protocol { void (*err_handler)(struct sk_buff *skb, u32 info); }; -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 - int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); -struct gre_cisco_protocol { - int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); - int (*err_handler)(struct sk_buff *skb, u32 info, - const struct tnl_ptk_info *tpi); - u8 priority; -}; - -int gre_cisco_register(struct gre_cisco_protocol *proto); -int gre_cisco_unregister(struct gre_cisco_protocol *proto); - struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); - -static inline int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags&TUNNEL_CSUM) - addend += 4; - if (o_flags&TUNNEL_KEY) - addend += 4; - if (o_flags&TUNNEL_SEQ) - addend += 4; - return addend; -} - -static inline __be16 gre_flags_to_tnl_flags(__be16 flags) -{ - __be16 tflags = 0; - - if (flags & GRE_CSUM) - tflags |= TUNNEL_CSUM; - if (flags & GRE_ROUTING) - tflags |= TUNNEL_ROUTING; - if (flags & GRE_KEY) - tflags |= TUNNEL_KEY; - if (flags & GRE_SEQ) - tflags |= TUNNEL_SEQ; - if (flags & GRE_STRICT) - tflags |= TUNNEL_STRICT; - if (flags & GRE_REC) - tflags |= TUNNEL_REC; - if (flags & GRE_VERSION) - tflags |= TUNNEL_VERSION; - - return tflags; -} - -static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) -{ - __be16 flags = 0; - - if (tflags & TUNNEL_CSUM) - flags |= GRE_CSUM; - if (tflags & TUNNEL_ROUTING) - flags |= GRE_ROUTING; - if (tflags & TUNNEL_KEY) - flags |= GRE_KEY; - if (tflags & TUNNEL_SEQ) - flags |= GRE_SEQ; - if (tflags & TUNNEL_STRICT) - flags |= GRE_STRICT; - if (tflags & TUNNEL_REC) - flags |= GRE_REC; - if (tflags & TUNNEL_VERSION) - flags |= GRE_VERSION; - - return flags; -} - #endif diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 77562e0ac66b..d9c552a721fc 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -31,7 +31,6 @@ #include static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -61,163 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err) -{ - const struct gre_base_hdr *greh; - __be32 *options; - int hdr_len; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { - *csum_err = true; - return -EINVAL; - } - - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); - - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - hdr_len += 4; - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - } - } - - return iptunnel_pull_header(skb, hdr_len, tpi->proto); -} - -static int gre_cisco_rcv(struct sk_buff *skb) -{ - struct tnl_ptk_info tpi; - int i; - bool csum_err = false; - -#ifdef CONFIG_NET_IPGRE_BROADCAST - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; - } -#endif - - if (parse_gre_header(skb, &tpi, &csum_err) < 0) - goto drop; - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - int ret; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - ret = proto->handler(skb, &tpi); - if (ret == PACKET_RCVD) { - rcu_read_unlock(); - return 0; - } - } - rcu_read_unlock(); - - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; -} - -static void gre_cisco_err(struct sk_buff *skb, u32 info) -{ - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - * - * Moreover, Cisco "wise men" put GRE key to the third word - * in GRE header. It makes impossible maintaining even soft - * state for keyed - * GRE tunnels with enabled checksum. Tell them "thank you". - * - * Well, I wonder, rfc1812 was written by Cisco employee, - * what the hell these idiots break standards established - * by themselves??? - */ - - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct tnl_ptk_info tpi; - bool csum_err = false; - int i; - - if (parse_gre_header(skb, &tpi, &csum_err)) { - if (!csum_err) /* ignore csum errors. */ - return; - } - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - skb->dev->ifindex, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, - IPPROTO_GRE, 0); - return; - } - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - - if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) - goto out; - - } -out: - rcu_read_unlock(); -} - static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; @@ -268,60 +110,19 @@ static const struct net_protocol net_gre_protocol = { .netns_ok = 1, }; -static const struct gre_protocol ipgre_protocol = { - .handler = gre_cisco_rcv, - .err_handler = gre_cisco_err, -}; - -int gre_cisco_register(struct gre_cisco_protocol *newp) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[newp->priority]; - - return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; -} -EXPORT_SYMBOL_GPL(gre_cisco_register); - -int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[del_proto->priority]; - int ret; - - ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; - - if (ret) - return ret; - - synchronize_net(); - return 0; -} -EXPORT_SYMBOL_GPL(gre_cisco_unregister); - static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { pr_err("can't add protocol\n"); - goto err; + return -EAGAIN; } - - if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { - pr_info("%s: can't add ipgre handler\n", __func__); - goto err_gre; - } - return 0; -err_gre: - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -err: - return -EAGAIN; } static void __exit gre_exit(void) { - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 49d140200d03..fb44d693796e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -123,8 +123,127 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static int ipgre_err(struct sk_buff *skb, u32 info, - const struct tnl_ptk_info *tpi) +static int ip_gre_calc_hlen(__be16 o_flags) +{ + int addend = 4; + + if (o_flags & TUNNEL_CSUM) + addend += 4; + if (o_flags & TUNNEL_KEY) + addend += 4; + if (o_flags & TUNNEL_SEQ) + addend += 4; + return addend; +} + +static __be16 gre_flags_to_tnl_flags(__be16 flags) +{ + __be16 tflags = 0; + + if (flags & GRE_CSUM) + tflags |= TUNNEL_CSUM; + if (flags & GRE_ROUTING) + tflags |= TUNNEL_ROUTING; + if (flags & GRE_KEY) + tflags |= TUNNEL_KEY; + if (flags & GRE_SEQ) + tflags |= TUNNEL_SEQ; + if (flags & GRE_STRICT) + tflags |= TUNNEL_STRICT; + if (flags & GRE_REC) + tflags |= TUNNEL_REC; + if (flags & GRE_VERSION) + tflags |= TUNNEL_VERSION; + + return tflags; +} + +static __be16 tnl_flags_to_gre_flags(__be16 tflags) +{ + __be16 flags = 0; + + if (tflags & TUNNEL_CSUM) + flags |= GRE_CSUM; + if (tflags & TUNNEL_ROUTING) + flags |= GRE_ROUTING; + if (tflags & TUNNEL_KEY) + flags |= GRE_KEY; + if (tflags & TUNNEL_SEQ) + flags |= GRE_SEQ; + if (tflags & TUNNEL_STRICT) + flags |= GRE_STRICT; + if (tflags & TUNNEL_REC) + flags |= GRE_REC; + if (tflags & TUNNEL_VERSION) + flags |= GRE_VERSION; + + return flags; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (skb_checksum_simple_validate(skb)) { + *csum_err = true; + return -EINVAL; + } + + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else { + tpi->key = 0; + } + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else { + tpi->seq = 0; + } + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + return iptunnel_pull_header(skb, hdr_len, tpi->proto); +} + +static void ipgre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only @@ -150,14 +269,14 @@ static int ipgre_err(struct sk_buff *skb, u32 info, switch (type) { default: case ICMP_PARAMETERPROB: - return PACKET_RCVD; + return; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ - return PACKET_RCVD; + return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -166,9 +285,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info, break; } break; + case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) - return PACKET_RCVD; + return; break; case ICMP_REDIRECT: @@ -185,21 +305,60 @@ static int ipgre_err(struct sk_buff *skb, u32 info, iph->daddr, iph->saddr, tpi->key); if (!t) - return PACKET_REJECT; + return; if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - return PACKET_RCVD; + return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return PACKET_RCVD; + return; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; - return PACKET_RCVD; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + ipgre_err(skb, info, &tpi); } static __be64 key_to_tunnel_id(__be32 key) @@ -268,6 +427,31 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } +static int gre_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + bool csum_err = false; + +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + } +#endif + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) + return 0; + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, __be16 proto, __be32 key, __be32 seq) { @@ -684,10 +868,9 @@ static int ipgre_tunnel_init(struct net_device *dev) return ip_tunnel_init(dev); } -static struct gre_cisco_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, - .priority = 0, +static const struct gre_protocol ipgre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, }; static int __net_init ipgre_init_net(struct net *net) @@ -1080,7 +1263,7 @@ static int __init ipgre_init(void) if (err < 0) goto pnet_tap_faied; - err = gre_cisco_register(&ipgre_protocol); + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; @@ -1099,7 +1282,7 @@ static int __init ipgre_init(void) tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - gre_cisco_unregister(&ipgre_protocol); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: unregister_pernet_device(&ipgre_tap_net_ops); pnet_tap_faied: @@ -1111,7 +1294,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - gre_cisco_unregister(&ipgre_protocol); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } -- cgit v1.2.3 From 42a7b32b73d6bf22e4bdd7bf68746e2d71f4cd8d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 10 Aug 2015 16:58:11 -0600 Subject: xfrm: Add oif to dst lookups Rules can be installed that direct route lookups to specific tables based on oif. Plumb the oif through the xfrm lookups so it gets set in the flow struct and passed to the resolver routines. Signed-off-by: David Ahern Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 7 +++++-- net/ipv4/xfrm4_policy.c | 11 ++++++----- net/ipv6/xfrm6_policy.c | 7 ++++--- net/xfrm/xfrm_policy.c | 24 ++++++++++++++---------- 4 files changed, 29 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0ee97eec24d..312e3fee9ccf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,10 +285,13 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(struct net *net, int tos, + struct dst_entry *(*dst_lookup)(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr); - int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, int oif, + xfrm_address_t *saddr, + xfrm_address_t *daddr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bff69746e05f..55b3c0f4dde5 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -19,7 +19,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -28,6 +28,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; + fl4->flowi4_oif = oif; if (saddr) fl4->saddr = saddr->a4; @@ -38,22 +39,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); } -static int xfrm4_get_saddr(struct net *net, +static int xfrm4_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ed0583c1b9fc..a74013d3eceb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -26,7 +26,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -35,6 +35,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int err; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -50,13 +51,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm6_get_saddr(struct net *net, +static int xfrm6_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(net, 0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 18cead7645be..94af3d065785 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -115,7 +115,8 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) rcu_read_unlock(); } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) @@ -127,14 +128,15 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); xfrm_policy_put_afinfo(afinfo); return dst; } -static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, +static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family) @@ -153,7 +155,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -1373,15 +1375,15 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, - unsigned short family) +xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, + xfrm_address_t *remote, unsigned short family) { int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, local, remote); + err = afinfo->get_saddr(net, oif, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1410,7 +1412,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family); if (error) goto fail; local = &tmp; @@ -1690,8 +1694,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, - family); + dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, + &saddr, &daddr, family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; -- cgit v1.2.3 From cdf0969763e020923abe28fddc605add572febc2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 11 Aug 2015 12:00:37 -0700 Subject: Revert "Merge branch 'mv88e6xxx-switchdev-fdb'" This reverts commit f1d5ca434413b20cd3f8c18ff2b634b7782149a5, reversing changes made to 4933d85c5173832ebd261756522095837583c458. I applied v2 instead of v3. Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6171.c | 6 +- drivers/net/dsa/mv88e6352.c | 6 +- drivers/net/dsa/mv88e6xxx.c | 223 +++++++++++------------------------ drivers/net/dsa/mv88e6xxx.h | 31 ++--- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/dsa.h | 16 +-- include/net/switchdev.h | 3 +- net/bridge/br_fdb.c | 2 +- net/dsa/slave.c | 218 ++++++++++++++++------------------ net/switchdev/switchdev.c | 7 +- 10 files changed, 197 insertions(+), 317 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 735f04cd83ee..1c7808495a9d 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,9 +116,9 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .port_fdb_add = mv88e6xxx_port_fdb_add, - .port_fdb_del = mv88e6xxx_port_fdb_del, - .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, + .fdb_add = mv88e6xxx_port_fdb_add, + .fdb_del = mv88e6xxx_port_fdb_del, + .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index a18f7c83d4cb..7e935852e192 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -343,9 +343,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .port_fdb_add = mv88e6xxx_port_fdb_add, - .port_fdb_del = mv88e6xxx_port_fdb_del, - .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, + .fdb_add = mv88e6xxx_port_fdb_add, + .fdb_del = mv88e6xxx_port_fdb_del, + .fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 9c6781de533b..109452056eff 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -964,7 +964,7 @@ static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int fid, u16 cmd) { int ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_FID, fid); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid); if (ret < 0) return ret; @@ -1091,7 +1091,7 @@ int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) ps->bridge_mask[fid] = br_port_mask; if (fid != ps->fid[port]) { - clear_bit(ps->fid[port], ps->fid_bitmap); + ps->fid_mask |= 1 << ps->fid[port]; ps->fid[port] = fid; ret = _mv88e6xxx_update_bridge_config(ds, fid); } @@ -1125,16 +1125,9 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) mutex_lock(&ps->smi_mutex); - newfid = find_next_zero_bit(ps->fid_bitmap, VLAN_N_VID, 1); - if (unlikely(newfid > ps->num_ports)) { - netdev_err(ds->ports[port], "all first %d FIDs are used\n", - ps->num_ports); - ret = -ENOSPC; - goto unlock; - } - + newfid = __ffs(ps->fid_mask); ps->fid[port] = newfid; - set_bit(newfid, ps->fid_bitmap); + ps->fid_mask &= ~(1 << newfid); ps->bridge_mask[fid] &= ~(1 << port); ps->bridge_mask[newfid] = 1 << port; @@ -1142,7 +1135,6 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) if (!ret) ret = _mv88e6xxx_update_bridge_config(ds, newfid); -unlock: mutex_unlock(&ps->smi_mutex); return ret; @@ -1182,8 +1174,8 @@ int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state) return 0; } -static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds, - const u8 addr[ETH_ALEN]) +static int __mv88e6xxx_write_addr(struct dsa_switch *ds, + const unsigned char *addr) { int i, ret; @@ -1198,7 +1190,7 @@ static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds, return 0; } -static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, u8 addr[ETH_ALEN]) +static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr) { int i, ret; @@ -1214,190 +1206,109 @@ static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, u8 addr[ETH_ALEN]) return 0; } -static int _mv88e6xxx_atu_load(struct dsa_switch *ds, - struct mv88e6xxx_atu_entry *entry) -{ - u16 reg = 0; - int ret; - - ret = _mv88e6xxx_atu_wait(ds); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_atu_mac_write(ds, entry->mac); - if (ret < 0) - return ret; - - if (entry->state != GLOBAL_ATU_DATA_STATE_UNUSED) { - unsigned int mask, shift; - - if (entry->trunk) { - reg |= GLOBAL_ATU_DATA_TRUNK; - mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK; - shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT; - } else { - mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK; - shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT; - } - - reg |= (entry->portv_trunkid << shift) & mask; - } - - reg |= entry->state & GLOBAL_ATU_DATA_STATE_MASK; - - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_DATA, reg); - if (ret < 0) - return ret; - - return _mv88e6xxx_atu_cmd(ds, entry->fid, GLOBAL_ATU_OP_LOAD_DB); -} - -static int _mv88e6xxx_atu_getnext(struct dsa_switch *ds, u16 fid, - const u8 addr[ETH_ALEN], - struct mv88e6xxx_atu_entry *entry) +static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port, + const unsigned char *addr, int state) { - struct mv88e6xxx_atu_entry next = { 0 }; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u8 fid = ps->fid[port]; int ret; - next.fid = fid; - ret = _mv88e6xxx_atu_wait(ds); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_mac_write(ds, addr); + ret = __mv88e6xxx_write_addr(ds, addr); if (ret < 0) return ret; - ret = _mv88e6xxx_atu_cmd(ds, fid, GLOBAL_ATU_OP_GET_NEXT_DB); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_atu_mac_read(ds, next.mac); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_ATU_DATA); - if (ret < 0) - return ret; - - next.state = ret & GLOBAL_ATU_DATA_STATE_MASK; - if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) { - unsigned int mask, shift; - - if (ret & GLOBAL_ATU_DATA_TRUNK) { - next.trunk = true; - mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK; - shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT; - } else { - next.trunk = false; - mask = GLOBAL_ATU_DATA_PORT_VECTOR_MASK; - shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT; - } - - next.portv_trunkid = (ret & mask) >> shift; - } - - *entry = next; - return 0; -} - -static int _mv88e6xxx_port_vid_to_fid(struct dsa_switch *ds, int port, u16 vid) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (vid == 0) - return ps->fid[port]; - - return -ENOENT; -} - -static int _mv88e6xxx_port_fdb_load(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN], u8 state) -{ - struct mv88e6xxx_atu_entry entry = { 0 }; - int ret; - - ret = _mv88e6xxx_port_vid_to_fid(ds, port, vid); - if (ret < 0) + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_DATA, + (0x10 << port) | state); + if (ret) return ret; - entry.fid = ret; - entry.state = state; - ether_addr_copy(entry.mac, addr); - if (state != GLOBAL_ATU_DATA_STATE_UNUSED) { - entry.trunk = false; - entry.portv_trunkid = BIT(port); - } + ret = _mv88e6xxx_atu_cmd(ds, fid, GLOBAL_ATU_OP_LOAD_DB); - return _mv88e6xxx_atu_load(ds, &entry); + return ret; } -int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]) +int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u8 state = is_multicast_ether_addr(addr) ? + int state = is_multicast_ether_addr(addr) ? GLOBAL_ATU_DATA_STATE_MC_STATIC : GLOBAL_ATU_DATA_STATE_UC_STATIC; + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_port_fdb_load(ds, port, vid, addr, state); + ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, state); mutex_unlock(&ps->smi_mutex); return ret; } -int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]) +int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - u8 state = GLOBAL_ATU_DATA_STATE_UNUSED; int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_port_fdb_load(ds, port, vid, addr, state); + ret = __mv88e6xxx_port_fdb_cmd(ds, port, addr, + GLOBAL_ATU_DATA_STATE_UNUSED); mutex_unlock(&ps->smi_mutex); return ret; } -int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, u16 *vid, - u8 addr[ETH_ALEN], bool *is_static) +static int __mv88e6xxx_port_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - struct mv88e6xxx_atu_entry next; - u16 fid; - int ret; + u8 fid = ps->fid[port]; + int ret, state; - mutex_lock(&ps->smi_mutex); + ret = _mv88e6xxx_atu_wait(ds); + if (ret < 0) + return ret; - ret = _mv88e6xxx_port_vid_to_fid(ds, port, *vid); + ret = __mv88e6xxx_write_addr(ds, addr); if (ret < 0) - goto unlock; - fid = ret; + return ret; do { - if (is_broadcast_ether_addr(addr)) { - ret = -ENOENT; - goto unlock; - } + ret = _mv88e6xxx_atu_cmd(ds, fid, GLOBAL_ATU_OP_GET_NEXT_DB); + if (ret < 0) + return ret; - ret = _mv88e6xxx_atu_getnext(ds, fid, addr, &next); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_ATU_DATA); if (ret < 0) - goto unlock; + return ret; + state = ret & GLOBAL_ATU_DATA_STATE_MASK; + if (state == GLOBAL_ATU_DATA_STATE_UNUSED) + return -ENOENT; + } while (!(((ret >> 4) & 0xff) & (1 << port))); - ether_addr_copy(addr, next.mac); + ret = __mv88e6xxx_read_addr(ds, addr); + if (ret < 0) + return ret; - if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED) - continue; - } while (next.trunk || (next.portv_trunkid & BIT(port)) == 0); + *is_static = state == (is_multicast_ether_addr(addr) ? + GLOBAL_ATU_DATA_STATE_MC_STATIC : + GLOBAL_ATU_DATA_STATE_UC_STATIC); + + return 0; +} + +/* get next entry for port */ +int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; - *is_static = next.state == (is_multicast_ether_addr(addr) ? - GLOBAL_ATU_DATA_STATE_MC_STATIC : - GLOBAL_ATU_DATA_STATE_UC_STATIC); -unlock: + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_port_getnext(ds, port, addr, is_static); mutex_unlock(&ps->smi_mutex); return ret; @@ -1641,9 +1552,9 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * ports, and allow each of the 'real' ports to only talk to * the upstream port. */ - fid = port + 1; + fid = __ffs(ps->fid_mask); ps->fid[port] = fid; - set_bit(fid, ps->fid_bitmap); + ps->fid_mask &= ~(1 << fid); if (!dsa_is_cpu_port(ds, port)) ps->bridge_mask[fid] = 1 << port; @@ -1740,7 +1651,7 @@ static int mv88e6xxx_atu_show_db(struct seq_file *s, struct dsa_switch *ds, unsigned char addr[6]; int ret, data, state; - ret = _mv88e6xxx_atu_mac_write(ds, bcast); + ret = __mv88e6xxx_write_addr(ds, bcast); if (ret < 0) return ret; @@ -1755,7 +1666,7 @@ static int mv88e6xxx_atu_show_db(struct seq_file *s, struct dsa_switch *ds, state = data & GLOBAL_ATU_DATA_STATE_MASK; if (state == GLOBAL_ATU_DATA_STATE_UNUSED) break; - ret = _mv88e6xxx_atu_mac_read(ds, addr); + ret = __mv88e6xxx_read_addr(ds, addr); if (ret < 0) return ret; mv88e6xxx_atu_show_entry(s, dbnum, addr, data); @@ -1942,6 +1853,8 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds) ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0; + ps->fid_mask = (1 << DSA_MAX_PORTS) - 1; + INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work); name = kasprintf(GFP_KERNEL, "dsa%d", ds->index); diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index a94c0cbb3813..8b017d65b691 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -11,8 +11,6 @@ #ifndef __MV88E6XXX_H #define __MV88E6XXX_H -#include - #ifndef UINT64_MAX #define UINT64_MAX (u64)(~((u64)0)) #endif @@ -171,7 +169,6 @@ #define GLOBAL_MAC_01 0x01 #define GLOBAL_MAC_23 0x02 #define GLOBAL_MAC_45 0x03 -#define GLOBAL_ATU_FID 0x01 /* 6097 6165 6351 6352 */ #define GLOBAL_CONTROL 0x04 #define GLOBAL_CONTROL_SW_RESET BIT(15) #define GLOBAL_CONTROL_PPU_ENABLE BIT(14) @@ -206,8 +203,6 @@ #define GLOBAL_ATU_OP_GET_CLR_VIOLATION ((7 << 12) | GLOBAL_ATU_OP_BUSY) #define GLOBAL_ATU_DATA 0x0c #define GLOBAL_ATU_DATA_TRUNK BIT(15) -#define GLOBAL_ATU_DATA_TRUNK_ID_MASK 0x00f0 -#define GLOBAL_ATU_DATA_TRUNK_ID_SHIFT 4 #define GLOBAL_ATU_DATA_PORT_VECTOR_MASK 0x3ff0 #define GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT 4 #define GLOBAL_ATU_DATA_STATE_MASK 0x0f @@ -318,14 +313,6 @@ #define GLOBAL2_QOS_WEIGHT 0x1c #define GLOBAL2_MISC 0x1d -struct mv88e6xxx_atu_entry { - u16 fid; - u8 state; - bool trunk; - u16 portv_trunkid; - u8 mac[ETH_ALEN]; -}; - struct mv88e6xxx_priv_state { /* When using multi-chip addressing, this mutex protects * access to the indirect access registers. (In single-chip @@ -364,9 +351,9 @@ struct mv88e6xxx_priv_state { /* hw bridging */ - DECLARE_BITMAP(fid_bitmap, VLAN_N_VID); /* FIDs 1 to 4095 available */ - u16 fid[DSA_MAX_PORTS]; /* per (non-bridged) port FID */ - u16 bridge_mask[DSA_MAX_PORTS]; /* br groups (indexed by FID) */ + u32 fid_mask; + u8 fid[DSA_MAX_PORTS]; + u16 bridge_mask[DSA_MAX_PORTS]; unsigned long port_state_update_mask; u8 port_state[DSA_MAX_PORTS]; @@ -426,15 +413,15 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask); int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask); int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state); +int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); +int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); +int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static); int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg); int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, int reg, int val); -int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]); -int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]); -int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, u16 *vid, - u8 addr[ETH_ALEN], bool *is_static); extern struct dsa_switch_driver mv88e6131_switch_driver; extern struct dsa_switch_driver mv88e6123_61_65_switch_driver; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 80bb25c5a644..b77e0e7307d4 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { if (found->key.pport != rocker_port->pport) continue; - ether_addr_copy(fdb->addr, found->key.addr); + fdb->addr = found->key.addr; fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); err = obj->cb(rocker_port->dev, obj); diff --git a/include/net/dsa.h b/include/net/dsa.h index 091d35f77180..fbca63ba8f73 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,16 +296,12 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - - /* - * Forwarding database - */ - int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]); - int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid, - const u8 addr[ETH_ALEN]); - int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid, - u8 addr[ETH_ALEN], bool *is_static); + int (*fdb_add)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*fdb_del)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*fdb_getnext)(struct dsa_switch *ds, int port, + unsigned char *addr, bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 0e296b82aef3..89da8934519b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -70,9 +70,8 @@ struct switchdev_obj { u32 tb_id; } ipv4_fib; struct switchdev_obj_fdb { /* PORT_FDB */ - u8 addr[ETH_ALEN]; + const unsigned char *addr; u16 vid; - bool is_static; } fdb; } u; }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 5656b44bf3de..9e9875da0a4f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { + .addr = f->addr.addr, .vid = f->vlan_id, }, }; - ether_addr_copy(obj.u.fdb.addr, f->addr.addr); switchdev_port_obj_del(f->dst->dev, &obj); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1dbdeaab2bb4..0010c690cc67 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "dsa_priv.h" /* slave mii_bus handling ***************************************************/ @@ -201,6 +200,105 @@ out: return 0; } +static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlm_flags) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_add) + ret = ds->drv->fdb_add(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret = -EOPNOTSUPP; + + if (ds->drv->fdb_del) + ret = ds->drv->fdb_del(ds, p->port, addr, vid); + + return ret; +} + +static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, + const unsigned char *addr, u16 vid, + bool is_static, + u32 portid, u32 seq, int type, + unsigned int flags) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_EXT_LEARNED; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about entries, in response to GETNEIGH */ +static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned char addr[ETH_ALEN] = { 0 }; + int ret; + + if (!ds->drv->fdb_getnext) + return -EOPNOTSUPP; + + for (; ; idx++) { + bool is_static; + + ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); + if (ret < 0) + break; + + if (idx < cb->args[0]) + continue; + + ret = dsa_slave_fill_info(dev, skb, addr, 0, + is_static, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, NLM_F_MULTI); + if (ret < 0) + break; + } + + return idx; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -266,115 +364,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, return ret; } -static int dsa_slave_port_fdb_add(struct net_device *dev, - struct switchdev_obj *obj) -{ - struct switchdev_obj_fdb *fdb = &obj->u.fdb; - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int err; - - if (obj->trans == SWITCHDEV_TRANS_PREPARE) - err = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP; - else if (obj->trans == SWITCHDEV_TRANS_COMMIT) - err = ds->drv->port_fdb_add(ds, p->port, fdb->vid, fdb->addr); - else - err = -EOPNOTSUPP; - - return err; -} - -static int dsa_slave_port_fdb_del(struct net_device *dev, - struct switchdev_obj *obj) -{ - struct switchdev_obj_fdb *fdb = &obj->u.fdb; - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - - if (!ds->drv->port_fdb_del) - return -EOPNOTSUPP; - - return ds->drv->port_fdb_del(ds, p->port, fdb->vid, fdb->addr); -} - -static int dsa_slave_port_fdb_dump(struct net_device *dev, - struct switchdev_obj *obj) -{ - struct switchdev_obj_fdb *fdb = &obj->u.fdb; - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int err; - - if (!ds->drv->port_fdb_getnext) - return -EOPNOTSUPP; - - memset(fdb, 0, sizeof(*fdb)); - - for (;;) { - err = ds->drv->port_fdb_getnext(ds, p->port, &fdb->vid, - fdb->addr, &fdb->is_static); - if (err) - break; - - err = obj->cb(dev, obj); - if (err) - break; - } - - return err == -ENOENT ? 0 : err; -} - -static int dsa_slave_port_obj_add(struct net_device *dev, - struct switchdev_obj *obj) -{ - int err; - - switch (obj->id) { - case SWITCHDEV_OBJ_PORT_FDB: - err = dsa_slave_port_fdb_add(dev, obj); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int dsa_slave_port_obj_del(struct net_device *dev, - struct switchdev_obj *obj) -{ - int err; - - switch (obj->id) { - case SWITCHDEV_OBJ_PORT_FDB: - err = dsa_slave_port_fdb_del(dev, obj); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int dsa_slave_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj) -{ - int err; - - switch (obj->id) { - case SWITCHDEV_OBJ_PORT_FDB: - err = dsa_slave_port_fdb_dump(dev, obj); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { @@ -776,9 +765,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_fdb_add = dsa_slave_fdb_add, + .ndo_fdb_del = dsa_slave_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -791,9 +780,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = { static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_attr_get = dsa_slave_port_attr_get, .switchdev_port_attr_set = dsa_slave_port_attr_set, - .switchdev_port_obj_add = dsa_slave_port_obj_add, - .switchdev_port_obj_del = dsa_slave_port_obj_del, - .switchdev_port_obj_dump = dsa_slave_port_obj_dump, }; static void dsa_slave_adjust_link(struct net_device *dev) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index e9d1cacc4060..33bafa2e703e 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -743,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { + .addr = addr, .vid = vid, }, }; - ether_addr_copy(obj.u.fdb.addr, addr); return switchdev_port_obj_add(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -770,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { + .addr = addr, .vid = vid, }, }; - ether_addr_copy(obj.u.fdb.addr, addr); return switchdev_port_obj_del(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); @@ -811,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_flags = NTF_SELF; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE; + ndm->ndm_state = NUD_REACHABLE; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) goto nla_put_failure; -- cgit v1.2.3 From 2a778e1b58990e15de5cba4badec1fa7ecb87e80 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 10 Aug 2015 09:09:49 -0400 Subject: net: dsa: change FDB routines prototypes Change the prototype of port_getnext to include a vid parameter. This is necessary to introduce the support for VLAN. Also rename the fdb_{add,del,getnext} function pointers to port_fdb_{add,del,getnext} since they are specific to a given port. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6171.c | 6 +++--- drivers/net/dsa/mv88e6352.c | 6 +++--- drivers/net/dsa/mv88e6xxx.c | 2 +- drivers/net/dsa/mv88e6xxx.h | 2 +- include/net/dsa.h | 17 +++++++++++------ net/dsa/slave.c | 16 +++++++++------- 6 files changed, 28 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 1c7808495a9d..735f04cd83ee 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,9 +116,9 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .fdb_add = mv88e6xxx_port_fdb_add, - .fdb_del = mv88e6xxx_port_fdb_del, - .fdb_getnext = mv88e6xxx_port_fdb_getnext, + .port_fdb_add = mv88e6xxx_port_fdb_add, + .port_fdb_del = mv88e6xxx_port_fdb_del, + .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 7e935852e192..a18f7c83d4cb 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -343,9 +343,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update = mv88e6xxx_port_stp_update, - .fdb_add = mv88e6xxx_port_fdb_add, - .fdb_del = mv88e6xxx_port_fdb_del, - .fdb_getnext = mv88e6xxx_port_fdb_getnext, + .port_fdb_add = mv88e6xxx_port_fdb_add, + .port_fdb_del = mv88e6xxx_port_fdb_del, + .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 0cc83785d194..d68e3fdd6c99 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1310,7 +1310,7 @@ static int __mv88e6xxx_port_getnext(struct dsa_switch *ds, int port, /* get next entry for port */ int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static) + unsigned char *addr, u16 *vid, bool *is_static) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 200327b7ea7d..55a6190ce159 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -421,7 +421,7 @@ int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + unsigned char *addr, u16 *vid, bool *is_static); int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg); int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, int reg, int val); diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63ba8f73..6356f437e911 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,12 +296,17 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - int (*fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_getnext)(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + + /* + * Forwarding database + */ + int (*port_fdb_add)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*port_fdb_del)(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid); + int (*port_fdb_getnext)(struct dsa_switch *ds, int port, + unsigned char *addr, u16 *vid, + bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0010c690cc67..3d341b694ecf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -208,8 +208,8 @@ static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->fdb_add) - ret = ds->drv->fdb_add(ds, p->port, addr, vid); + if (ds->drv->port_fdb_add) + ret = ds->drv->port_fdb_add(ds, p->port, addr, vid); return ret; } @@ -222,8 +222,8 @@ static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->fdb_del) - ret = ds->drv->fdb_del(ds, p->port, addr, vid); + if (ds->drv->port_fdb_del) + ret = ds->drv->port_fdb_del(ds, p->port, addr, vid); return ret; } @@ -272,22 +272,24 @@ static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; unsigned char addr[ETH_ALEN] = { 0 }; + u16 vid = 0; int ret; - if (!ds->drv->fdb_getnext) + if (!ds->drv->port_fdb_getnext) return -EOPNOTSUPP; for (; ; idx++) { bool is_static; - ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static); + ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid, + &is_static); if (ret < 0) break; if (idx < cb->args[0]) continue; - ret = dsa_slave_fill_info(dev, skb, addr, 0, + ret = dsa_slave_fill_info(dev, skb, addr, vid, is_static, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, -- cgit v1.2.3 From ce80e7bc57e25062c361de8fb6444129a63bac6d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Mon, 10 Aug 2015 09:09:52 -0400 Subject: net: switchdev: support static FDB addresses This patch adds an ndm_state member to the switchdev_obj_fdb structure, in order to support static FDB addresses. Set Rocker ndm_state to NUD_REACHABLE. Signed-off-by: Vivien Didelot Acked-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 1 + include/net/switchdev.h | 1 + net/switchdev/switchdev.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index b77e0e7307d4..af050759eb44 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4544,6 +4544,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, if (found->key.pport != rocker_port->pport) continue; fdb->addr = found->key.addr; + fdb->ndm_state = NUD_REACHABLE; fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); err = obj->cb(rocker_port->dev, obj); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89da8934519b..319baab3b48e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj { struct switchdev_obj_fdb { /* PORT_FDB */ const unsigned char *addr; u16 vid; + u16 ndm_state; } fdb; } u; }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 33bafa2e703e..16c1c43980a1 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_flags = NTF_SELF; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; + ndm->ndm_state = obj->u.fdb.ndm_state; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) goto nla_put_failure; -- cgit v1.2.3 From b72f6f51dc5abce94c1b5ee0186e9407ea0f919f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 11 Aug 2015 21:44:08 +0200 Subject: 6lowpan: add generic 6lowpan netdev private data This patch introduced the 6lowpan netdev private data struct. We name it lowpan_priv and it's placed at the beginning of netdev private data. All lowpan interfaces should allocate this room at first of netdev private data. 6LoWPAN LL private data can be allocate by additional netdev private data, e.g. dev->priv_size should be "sizeof(struct lowpan_priv) + sizeof(LL_LOWPAN_PRIVATE_DATA)". Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/6lowpan.h | 23 +++++++++++++++++++++++ net/6lowpan/Makefile | 2 +- net/6lowpan/core.c | 20 ++++++++++++++++++++ net/bluetooth/6lowpan.c | 9 ++++++--- net/ieee802154/6lowpan/6lowpan_i.h | 3 ++- net/ieee802154/6lowpan/core.c | 4 +++- 6 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 net/6lowpan/core.c (limited to 'include') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index dc03d77ad23b..a2f59ec98d24 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -197,6 +197,27 @@ #define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ #define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */ +#define LOWPAN_PRIV_SIZE(llpriv_size) \ + (sizeof(struct lowpan_priv) + llpriv_size) + +enum lowpan_lltypes { + LOWPAN_LLTYPE_BTLE, + LOWPAN_LLTYPE_IEEE802154, +}; + +struct lowpan_priv { + enum lowpan_lltypes lltype; + + /* must be last */ + u8 priv[0] __aligned(sizeof(void *)); +}; + +static inline +struct lowpan_priv *lowpan_priv(const struct net_device *dev) +{ + return netdev_priv(dev); +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, @@ -372,6 +393,8 @@ lowpan_uncompress_size(const struct sk_buff *skb, u16 *dgram_offset) return skb->len + uncomp_header - ret; } +void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype); + int lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, const u8 *saddr, const u8 saddr_type, diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index eb8baa72adc8..c6ffc55ee0d7 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_6LOWPAN) += 6lowpan.o -6lowpan-y := iphc.o nhc.o +6lowpan-y := core.o iphc.o nhc.o #rfc6282 nhcs obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c new file mode 100644 index 000000000000..ed0eec9b41a1 --- /dev/null +++ b/net/6lowpan/core.c @@ -0,0 +1,20 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * (C) 2015 Pengutronix, Alexander Aring + */ + +#include + +void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) +{ + lowpan_priv(dev)->lltype = lltype; +} +EXPORT_SYMBOL(lowpan_netdev_setup); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 24ed5b02cefc..131e79cde350 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -85,7 +85,7 @@ struct lowpan_dev { static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) { - return netdev_priv(netdev); + return (struct lowpan_dev *)lowpan_priv(netdev)->priv; } static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) @@ -848,8 +848,9 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) struct net_device *netdev; int err = 0; - netdev = alloc_netdev(sizeof(struct lowpan_dev), IFACE_NAME_TEMPLATE, - NET_NAME_UNKNOWN, netdev_setup); + netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)), + IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN, + netdev_setup); if (!netdev) return -ENOMEM; @@ -869,6 +870,8 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) list_add_rcu(&(*dev)->list, &bt_6lowpan_devices); spin_unlock(&devices_lock); + lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE); + err = register_netdev(netdev); if (err < 0) { BT_INFO("register_netdev failed %d", err); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index 923b680adb61..ea339fa94c27 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -5,6 +5,7 @@ #include #include +#include struct lowpan_create_arg { u16 tag; @@ -46,7 +47,7 @@ struct lowpan_dev_info { static inline struct lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) { - return netdev_priv(dev); + return (struct lowpan_dev_info *)lowpan_priv(dev)->priv; } int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index a4edee8fdc79..180e9f5f86c3 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -138,6 +138,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, /* Set the lowpan hardware address to the wpan hardware address. */ memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154); + ret = register_netdevice(dev); if (ret >= 0) { real_dev->ieee802154_ptr->lowpan_dev = dev; @@ -162,7 +164,7 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head) static struct rtnl_link_ops lowpan_link_ops __read_mostly = { .kind = "lowpan", - .priv_size = sizeof(struct lowpan_dev_info), + .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)), .setup = lowpan_setup, .newlink = lowpan_newlink, .dellink = lowpan_dellink, -- cgit v1.2.3 From e037239e5e7b61007763984aa35a8329596d8c88 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Aug 2015 15:28:49 -0400 Subject: drm/radeon: add new OLAND pci id Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- include/drm/drm_pciids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 45c39a37f924..8bc073d297db 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -172,6 +172,7 @@ {0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3 From 4b58c37bb9d4282446f7a0194dbc44325787ac8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jul 2015 15:41:48 +0300 Subject: mac80211: remove ieee80211_aes_cmac_calculate_k1_k2() The iwlwifi driver was the only driver that used this, but as it turns out it never needed it, so we can remove it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 ------------- net/mac80211/aes_cmac.c | 17 ----------------- 2 files changed, 30 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 484cc14fb947..e3314e516681 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4330,19 +4330,6 @@ void ieee80211_get_tkip_rx_p1k(struct ieee80211_key_conf *keyconf, void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, struct sk_buff *skb, u8 *p2k); -/** - * ieee80211_aes_cmac_calculate_k1_k2 - calculate the AES-CMAC sub keys - * - * This function computes the two AES-CMAC sub-keys, based on the - * previously installed master key. - * - * @keyconf: the parameter passed with the set key - * @k1: a buffer to be filled with the 1st sub-key - * @k2: a buffer to be filled with the 2nd sub-key - */ -void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, - u8 *k1, u8 *k2); - /** * ieee80211_get_key_tx_seq - get key TX sequence counter * diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 4192806be3d3..bdf0790d89cc 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -145,20 +145,3 @@ void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) { crypto_free_cipher(tfm); } - -void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, - u8 *k1, u8 *k2) -{ - u8 l[AES_BLOCK_SIZE] = {}; - struct ieee80211_key *key = - container_of(keyconf, struct ieee80211_key, conf); - - crypto_cipher_encrypt_one(key->u.aes_cmac.tfm, l, l); - - memcpy(k1, l, AES_BLOCK_SIZE); - gf_mulx(k1); - - memcpy(k2, k1, AES_BLOCK_SIZE); - gf_mulx(k2); -} -EXPORT_SYMBOL(ieee80211_aes_cmac_calculate_k1_k2); -- cgit v1.2.3 From 4c96b7dc0d393f12c17e0d81db15aa4a820a6ab3 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 12 Aug 2015 17:06:26 -0500 Subject: Add a matching set of device_ functions for determining mac/phy OF has some helper functions for parsing MAC and PHY settings. In cases where the platform is providing this information rather than the device itself, there needs to be similar functions for ACPI. These functions are slightly modified versions of the ones in of_net which can use information provided via DT or ACPI. Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/base/property.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/property.h | 4 +++ 2 files changed, 77 insertions(+) (limited to 'include') diff --git a/drivers/base/property.c b/drivers/base/property.c index f3f6d167f3f1..2e8cd147f02d 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include /** * device_add_property_set - Add a collection of properties to a device object. @@ -533,3 +535,74 @@ bool device_dma_is_coherent(struct device *dev) return coherent; } EXPORT_SYMBOL_GPL(device_dma_is_coherent); + +/** + * device_get_phy_mode - Get phy mode for given device_node + * @dev: Pointer to the given device + * + * The function gets phy interface string from property 'phy-mode' or + * 'phy-connection-type', and return its index in phy_modes table, or errno in + * error case. + */ +int device_get_phy_mode(struct device *dev) +{ + const char *pm; + int err, i; + + err = device_property_read_string(dev, "phy-mode", &pm); + if (err < 0) + err = device_property_read_string(dev, + "phy-connection-type", &pm); + if (err < 0) + return err; + + for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) + if (!strcasecmp(pm, phy_modes(i))) + return i; + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(device_get_phy_mode); + +static void *device_get_mac_addr(struct device *dev, + const char *name, char *addr, + int alen) +{ + int ret = device_property_read_u8_array(dev, name, addr, alen); + + if (ret == 0 && is_valid_ether_addr(addr)) + return addr; + return NULL; +} + +/** + * Search the device tree for the best MAC address to use. 'mac-address' is + * checked first, because that is supposed to contain to "most recent" MAC + * address. If that isn't set, then 'local-mac-address' is checked next, + * because that is the default address. If that isn't set, then the obsolete + * 'address' is checked, just in case we're using an old device tree. + * + * Note that the 'address' property is supposed to contain a virtual address of + * the register set, but some DTS files have redefined that property to be the + * MAC address. + * + * All-zero MAC addresses are rejected, because those could be properties that + * exist in the device tree, but were not set by U-Boot. For example, the + * DTS could define 'mac-address' and 'local-mac-address', with zero MAC + * addresses. Some older U-Boots only initialized 'local-mac-address'. In + * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists + * but is all zeros. +*/ +void *device_get_mac_address(struct device *dev, char *addr, int alen) +{ + addr = device_get_mac_addr(dev, "mac-address", addr, alen); + if (addr) + return addr; + + addr = device_get_mac_addr(dev, "local-mac-address", addr, alen); + if (addr) + return addr; + + return device_get_mac_addr(dev, "address", addr, alen); +} +EXPORT_SYMBOL(device_get_mac_address); diff --git a/include/linux/property.h b/include/linux/property.h index 76ebde9c11d4..a59c6ee566c2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -166,4 +166,8 @@ void device_add_property_set(struct device *dev, struct property_set *pset); bool device_dma_is_coherent(struct device *dev); +int device_get_phy_mode(struct device *dev); + +void *device_get_mac_address(struct device *dev, char *addr, int alen); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From 35103d11173b8fea874183f8aa508ae71234d299 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Thu, 13 Aug 2015 10:39:01 -0400 Subject: net: ipv6 sysctl option to ignore routes when nexthop link is down Like the ipv4 patch with a similar title, this adds a sysctl to allow the user to change routing behavior based on whether or not the interface associated with the nexthop was an up or down link. The default setting preserves the current behavior, but anyone that enables it will notice that nexthops on down interfaces will no longer be selected: net.ipv6.conf.all.ignore_routes_with_linkdown = 0 net.ipv6.conf.default.ignore_routes_with_linkdown = 0 net.ipv6.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, not only will link status be reported to userspace, but an indication that a nexthop is dead and will not be used is also reported. 1000::/8 via 7000::2 dev p7p1 metric 1024 dead linkdown pref medium 1000::/8 via 8000::2 dev p8p1 metric 1024 pref medium 7000::/8 dev p7p1 proto kernel metric 256 dead linkdown pref medium 8000::/8 dev p8p1 proto kernel metric 256 pref medium 9000::/8 via 8000::2 dev p8p1 metric 2048 pref medium 9000::/8 via 7000::2 dev p7p1 metric 1024 dead linkdown pref medium fe80::/64 dev p7p1 proto kernel metric 256 dead linkdown pref medium fe80::/64 dev p8p1 proto kernel metric 256 pref medium This also adds devconf support and notification when sysctl values change. v2: drop use of rt6i_nhflags since it is not needed right now Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 105 +++++++++++++++++++++++++++++++++++++++++++++- net/ipv6/route.c | 11 ++++- 4 files changed, 116 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb9dcad72372..f1f32af6d9b9 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -31,6 +31,7 @@ struct ipv6_devconf { __s32 accept_ra_defrtr; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; + __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 80f3b74446a1..38b4fef20219 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -173,6 +173,7 @@ enum { DEVCONF_STABLE_SECRET, DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, + DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 53e3a9d756b0..5dfbac72f1ab 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -214,6 +214,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .initialized = false, }, .use_oif_addrs_only = 0, + .ignore_routes_with_linkdown = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .initialized = false, }, .use_oif_addrs_only = 0, + .ignore_routes_with_linkdown = 0, }; /* Check if a valid qdisc is available */ @@ -472,6 +474,9 @@ static int inet6_netconf_msgsize_devconf(int type) if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); + return size; } @@ -508,6 +513,11 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + devconf->ignore_routes_with_linkdown) < 0) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -544,6 +554,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -766,6 +777,63 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) rt6_purge_dflt_routers(net); return 1; } + +static void addrconf_linkdown_change(struct net *net, __s32 newf) +{ + struct net_device *dev; + struct inet6_dev *idev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) { + int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); + + idev->cnf.ignore_routes_with_linkdown = newf; + if (changed) + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + dev->ifindex, + &idev->cnf); + } + } +} + +static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) +{ + struct net *net; + int old; + + if (!rtnl_trylock()) + return restart_syscall(); + + net = (struct net *)table->extra2; + old = *p; + *p = newf; + + if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + rtnl_unlock(); + return 0; + } + + if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) { + net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf; + addrconf_linkdown_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + } + rtnl_unlock(); + + return 1; +} + #endif /* Nobody refers to this ifaddr, destroy it */ @@ -4616,6 +4684,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; /* we omit DEVCONF_STABLE_SECRET for now */ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; } @@ -5338,6 +5407,34 @@ out: return err; } +static +int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int *valp = ctl->data; + int val = *valp; + loff_t pos = *ppos; + struct ctl_table lctl; + int ret; + + /* ctl->data points to idev->cnf.ignore_routes_when_linkdown + * we should not modify it until we get the rtnl lock. + */ + lctl = *ctl; + lctl.data = &val; + + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); + + if (write) + ret = addrconf_fixup_linkdown(ctl, valp, val); + if (ret) + *ppos = pos; + return ret; +} + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -5629,7 +5726,13 @@ static struct addrconf_sysctl_table .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - + }, + { + .procname = "ignore_routes_with_linkdown", + .data = &ipv6_devconf.ignore_routes_with_linkdown, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, }, { /* sentinel */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 370f72785385..1c0217e61357 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -665,6 +665,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, { int m; bool match_do_rr = false; + struct inet6_dev *idev = rt->rt6i_idev; + struct net_device *dev = rt->dst.dev; + + if (dev && !netif_carrier_ok(dev) && + idev->cnf.ignore_routes_with_linkdown) + goto out; if (rt6_check_expired(rt)) goto out; @@ -2887,8 +2893,11 @@ static int rt6_fill_node(struct net *net, else rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (!netif_carrier_ok(rt->dst.dev)) + if (!netif_carrier_ok(rt->dst.dev)) { rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + rtm->rtm_flags |= RTNH_F_DEAD; + } rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; if (rt->rt6i_flags & RTF_DYNAMIC) -- cgit v1.2.3 From 111495361598205967f1be4e07d4726b0f762d60 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 13 Aug 2015 12:52:17 -0400 Subject: net: dsa: add support for switchdev VLAN objects Add new functions in DSA drivers to access hardware VLAN entries through SWITCHDEV_OBJ_PORT_VLAN objects: - port_pvid_get() and vlan_getnext() to dump a VLAN - port_vlan_del() to exclude a port from a VLAN - port_pvid_set() and port_vlan_add() to join a port to a VLAN The DSA infrastructure will ensure that each VLAN of the given range does not already belong to another bridge. If it does, it will fallback to software VLAN and won't program the hardware. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 11 ++++ net/dsa/slave.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 6356f437e911..bd9b76502458 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -297,6 +297,17 @@ struct dsa_switch_driver { int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); + /* + * VLAN support + */ + int (*port_pvid_get)(struct dsa_switch *ds, int port, u16 *pvid); + int (*port_pvid_set)(struct dsa_switch *ds, int port, u16 pvid); + int (*port_vlan_add)(struct dsa_switch *ds, int port, u16 vid, + bool untagged); + int (*port_vlan_del)(struct dsa_switch *ds, int port, u16 vid); + int (*vlan_getnext)(struct dsa_switch *ds, u16 *vid, + unsigned long *ports, unsigned long *untagged); + /* * Forwarding database */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index aa0266f7d0ce..373ff315030d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -200,6 +200,152 @@ out: return 0; } +static int dsa_bridge_check_vlan_range(struct dsa_switch *ds, + const struct net_device *bridge, + u16 vid_begin, u16 vid_end) +{ + struct dsa_slave_priv *p; + struct net_device *dev, *vlan_br; + DECLARE_BITMAP(members, DSA_MAX_PORTS); + DECLARE_BITMAP(untagged, DSA_MAX_PORTS); + u16 vid; + int member, err; + + if (!ds->drv->vlan_getnext || !vid_begin) + return -EOPNOTSUPP; + + vid = vid_begin - 1; + + do { + err = ds->drv->vlan_getnext(ds, &vid, members, untagged); + if (err) + break; + + if (vid > vid_end) + break; + + member = find_first_bit(members, DSA_MAX_PORTS); + if (member == DSA_MAX_PORTS) + continue; + + dev = ds->ports[member]; + p = netdev_priv(dev); + vlan_br = p->bridge_dev; + if (vlan_br == bridge) + continue; + + netdev_dbg(vlan_br, "hardware VLAN %d already in use\n", vid); + return -EOPNOTSUPP; + } while (vid < vid_end); + + return err == -ENOENT ? 0 : err; +} + +static int dsa_slave_port_vlan_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_vlan *vlan = &obj->u.vlan; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + u16 vid; + int err; + + switch (obj->trans) { + case SWITCHDEV_TRANS_PREPARE: + if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set) + return -EOPNOTSUPP; + + /* If the requested port doesn't belong to the same bridge as + * the VLAN members, fallback to software VLAN (hopefully). + */ + err = dsa_bridge_check_vlan_range(ds, p->bridge_dev, + vlan->vid_begin, + vlan->vid_end); + if (err) + return err; + break; + case SWITCHDEV_TRANS_COMMIT: + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { + err = ds->drv->port_vlan_add(ds, p->port, vid, + vlan->flags & + BRIDGE_VLAN_INFO_UNTAGGED); + if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID) + err = ds->drv->port_pvid_set(ds, p->port, vid); + if (err) + return err; + } + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int dsa_slave_port_vlan_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_vlan *vlan = &obj->u.vlan; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + u16 vid; + int err; + + if (!ds->drv->port_vlan_del) + return -EOPNOTSUPP; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { + err = ds->drv->port_vlan_del(ds, p->port, vid); + if (err) + return err; + } + + return 0; +} + +static int dsa_slave_port_vlan_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_obj_vlan *vlan = &obj->u.vlan; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + DECLARE_BITMAP(members, DSA_MAX_PORTS); + DECLARE_BITMAP(untagged, DSA_MAX_PORTS); + u16 pvid, vid = 0; + int err; + + if (!ds->drv->vlan_getnext || !ds->drv->port_pvid_get) + return -EOPNOTSUPP; + + err = ds->drv->port_pvid_get(ds, p->port, &pvid); + if (err) + return err; + + for (;;) { + err = ds->drv->vlan_getnext(ds, &vid, members, untagged); + if (err) + break; + + if (!test_bit(p->port, members)) + continue; + + memset(vlan, 0, sizeof(*vlan)); + vlan->vid_begin = vlan->vid_end = vid; + + if (vid == pvid) + vlan->flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(p->port, untagged)) + vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + err = obj->cb(dev, obj); + if (err) + break; + } + + return err == -ENOENT ? 0 : err; +} + static int dsa_slave_port_fdb_add(struct net_device *dev, struct switchdev_obj *obj) { @@ -341,6 +487,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev, case SWITCHDEV_OBJ_PORT_FDB: err = dsa_slave_port_fdb_add(dev, obj); break; + case SWITCHDEV_OBJ_PORT_VLAN: + err = dsa_slave_port_vlan_add(dev, obj); + break; default: err = -EOPNOTSUPP; break; @@ -358,6 +507,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, case SWITCHDEV_OBJ_PORT_FDB: err = dsa_slave_port_fdb_del(dev, obj); break; + case SWITCHDEV_OBJ_PORT_VLAN: + err = dsa_slave_port_vlan_del(dev, obj); + break; default: err = -EOPNOTSUPP; break; @@ -375,6 +527,9 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, case SWITCHDEV_OBJ_PORT_FDB: err = dsa_slave_port_fdb_dump(dev, obj); break; + case SWITCHDEV_OBJ_PORT_VLAN: + err = dsa_slave_port_vlan_dump(dev, obj); + break; default: err = -EOPNOTSUPP; break; @@ -794,6 +949,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, .ndo_poll_controller = dsa_slave_poll_controller, #endif + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { -- cgit v1.2.3 From 4e3c89920cd3a6cfce22c6f537690747c26128dd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Aug 2015 14:59:00 -0600 Subject: net: Introduce VRF related flags and helpers Add a VRF_MASTER flag for interfaces and helper functions for determining if a device is a VRF_MASTER. Add link attribute for passing VRF_TABLE id. Add vrf_ptr to netdevice. Add various macros for determining if a device is a VRF device, the index of the master VRF device and table associated with VRF device. Signed-off-by: Shrijeet Mukherjee Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 +++++++ include/net/vrf.h | 139 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/if_link.h | 9 +++ 3 files changed, 168 insertions(+) create mode 100644 include/net/vrf.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 607b5f41f46f..f7a6ef2fae3a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1289,6 +1289,7 @@ enum netdev_priv_flags { IFF_XMIT_DST_RELEASE_PERM = 1<<22, IFF_IPVLAN_MASTER = 1<<23, IFF_IPVLAN_SLAVE = 1<<24, + IFF_VRF_MASTER = 1<<25, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1316,6 +1317,7 @@ enum netdev_priv_flags { #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE +#define IFF_VRF_MASTER IFF_VRF_MASTER /** * struct net_device - The DEVICE structure. @@ -1432,6 +1434,7 @@ enum netdev_priv_flags { * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data + * @vrf_ptr: VRF specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * * @last_rx: Time of last Rx @@ -1650,6 +1653,7 @@ struct net_device { struct dn_dev __rcu *dn_ptr; struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; + struct net_vrf_dev __rcu *vrf_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) @@ -3808,6 +3812,22 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_is_vrf(const struct net_device *dev) +{ + return dev->priv_flags & IFF_VRF_MASTER; +} + +static inline bool netif_index_is_vrf(struct net *net, int ifindex) +{ + struct net_device *dev = dev_get_by_index_rcu(net, ifindex); + bool rc = false; + + if (dev) + rc = netif_is_vrf(dev); + + return rc; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/net/vrf.h b/include/net/vrf.h new file mode 100644 index 000000000000..0484d29d4589 --- /dev/null +++ b/include/net/vrf.h @@ -0,0 +1,139 @@ +/* + * include/net/net_vrf.h - adds vrf dev structure definitions + * Copyright (c) 2015 Cumulus Networks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_NET_VRF_H +#define __LINUX_NET_VRF_H + +struct net_vrf_dev { + struct rcu_head rcu; + int ifindex; /* ifindex of master dev */ + u32 tb_id; /* table id for VRF */ +}; + +struct slave { + struct list_head list; + struct net_device *dev; +}; + +struct slave_queue { + struct list_head all_slaves; + int num_slaves; +}; + +struct net_vrf { + struct slave_queue queue; + struct rtable *rth; + u32 tb_id; +}; + + +#if IS_ENABLED(CONFIG_NET_VRF) +/* called with rcu_read_lock() */ +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ + struct net_vrf_dev *vrf_ptr; + int ifindex = 0; + + if (!dev) + return 0; + + if (netif_is_vrf(dev)) + ifindex = dev->ifindex; + else { + vrf_ptr = rcu_dereference(dev->vrf_ptr); + if (vrf_ptr) + ifindex = vrf_ptr->ifindex; + } + + return ifindex; +} + +/* called with rcu_read_lock */ +static inline int vrf_dev_table_rcu(const struct net_device *dev) +{ + int tb_id = 0; + + if (dev) { + struct net_vrf_dev *vrf_ptr; + + vrf_ptr = rcu_dereference(dev->vrf_ptr); + if (vrf_ptr) + tb_id = vrf_ptr->tb_id; + } + return tb_id; +} + +static inline int vrf_dev_table(const struct net_device *dev) +{ + int tb_id; + + rcu_read_lock(); + tb_id = vrf_dev_table_rcu(dev); + rcu_read_unlock(); + + return tb_id; +} + +/* called with rtnl */ +static inline int vrf_dev_table_rtnl(const struct net_device *dev) +{ + int tb_id = 0; + + if (dev) { + struct net_vrf_dev *vrf_ptr; + + vrf_ptr = rtnl_dereference(dev->vrf_ptr); + if (vrf_ptr) + tb_id = vrf_ptr->tb_id; + } + return tb_id; +} + +/* caller has already checked netif_is_vrf(dev) */ +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ + struct rtable *rth = ERR_PTR(-ENETUNREACH); + struct net_vrf *vrf = netdev_priv(dev); + + if (vrf) { + rth = vrf->rth; + atomic_inc(&rth->dst.__refcnt); + } + return rth; +} + +#else +static inline int vrf_master_ifindex_rcu(const struct net_device *dev) +{ + return 0; +} + +static inline int vrf_dev_table_rcu(const struct net_device *dev) +{ + return 0; +} + +static inline int vrf_dev_table(const struct net_device *dev) +{ + return 0; +} + +static inline int vrf_dev_table_rtnl(const struct net_device *dev) +{ + return 0; +} + +static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) +{ + return ERR_PTR(-ENETUNREACH); +} +#endif + +#endif /* __LINUX_NET_VRF_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d450be36add2..313c305fd1ad 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,15 @@ enum macvlan_macaddr_mode { #define MACVLAN_FLAG_NOPROMISC 1 +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, -- cgit v1.2.3 From 613d09b30f8b589d5a9b49775054c8865db95d1c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Aug 2015 14:59:02 -0600 Subject: net: Use VRF device index for lookups on TX As with ingress use the index of VRF master device for route lookups on egress. However, the oif should only be used to direct the lookups to a specific table. Routes in the table are not based on the VRF device but rather interfaces that are part of the VRF so do not consider the oif for lookups within the table. The FLOWI_FLAG_VRFSRC is used to control this latter part. Signed-off-by: Shrijeet Mukherjee Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/flow.h | 1 + include/net/route.h | 3 +++ net/ipv4/fib_trie.c | 7 +++++-- net/ipv4/icmp.c | 4 ++++ net/ipv4/route.c | 5 +++++ 5 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 3098ae33a178..f305588fc162 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -33,6 +33,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_VRFSRC 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; diff --git a/include/net/route.h b/include/net/route.h index 2d45f419477f..94189d4bd899 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -251,6 +251,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; + if (netif_index_is_vrf(sock_net(sk), oif)) + flow_flags |= FLOWI_FLAG_VRFSRC; + flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 37c4bb89a708..1243c79cb5b0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1423,8 +1423,11 @@ found: nh->nh_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; - if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) - continue; + if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + if (flp->flowi4_oif && + flp->flowi4_oif != nh->nh_oif) + continue; + } if (!(fib_flags & FIB_LOOKUP_NOREF)) atomic_inc(&fi->fib_clntref); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c0556f1e4bf0..1164fc4ce3bc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -96,6 +96,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -425,6 +426,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; + fl4.flowi4_oif = vrf_master_ifindex_rcu(skb->dev) ? : skb->dev->ifindex; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -458,6 +460,8 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; + fl4->flowi4_oif = vrf_master_ifindex_rcu(skb_in->dev) ? : skb_in->dev->ifindex; + security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c26ff1f7067d..2c89d294b669 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2131,6 +2131,11 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } + if (netif_is_vrf(dev_out) && + !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + rth = vrf_dev_get_rth(dev_out); + goto out; + } } if (!fl4->daddr) { -- cgit v1.2.3 From 15be405eb2ea943ac5fa2aab7d0ba282e9ef1301 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Aug 2015 14:59:04 -0600 Subject: net: Add inet_addr lookup by table Currently inet_addr_type and inet_dev_addr_type expect local addresses to be in the local table. With the VRF device local routes for devices associated with a VRF will be in the table associated with the VRF. Provide an alternate inet_addr lookup to use a specific table rather than defaulting to the local table. Signed-off-by: Shrijeet Mukherjee Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/route.h | 1 + net/ipv4/fib_frontend.c | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 94189d4bd899..6ba681f0b98d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,6 +189,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); unsigned int inet_addr_type(struct net *net, __be32 addr); +unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d8ced1d89f1b..b11321a8e58d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -212,12 +212,12 @@ void fib_flush_external(struct net *net) */ static inline unsigned int __inet_dev_addr_type(struct net *net, const struct net_device *dev, - __be32 addr) + __be32 addr, int tb_id) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; unsigned int ret = RTN_BROADCAST; - struct fib_table *local_table; + struct fib_table *table; if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) return RTN_BROADCAST; @@ -226,10 +226,10 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, rcu_read_lock(); - local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (local_table) { + table = fib_get_table(net, tb_id); + if (table) { ret = RTN_UNICAST; - if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { + if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } @@ -239,16 +239,24 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, return ret; } +unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id) +{ + return __inet_dev_addr_type(net, NULL, addr, tb_id); +} +EXPORT_SYMBOL(inet_addr_type_table); + unsigned int inet_addr_type(struct net *net, __be32 addr) { - return __inet_dev_addr_type(net, NULL, addr); + return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL); } EXPORT_SYMBOL(inet_addr_type); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - return __inet_dev_addr_type(net, dev, addr); + int rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + + return __inet_dev_addr_type(net, dev, addr, rt_table); } EXPORT_SYMBOL(inet_dev_addr_type); -- cgit v1.2.3 From 30bbaa19500559d7625c65632195413f639b3b97 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Aug 2015 14:59:05 -0600 Subject: net: Fix up inet_addr_type checks Currently inet_addr_type and inet_dev_addr_type expect local addresses to be in the local table. With the VRF device local routes for devices associated with a VRF will be in the table associated with the VRF. Provide an alternate inet_addr lookup to use a specific table rather than defaulting to the local table. inet_addr_type_dev_table keeps the same semantics as inet_addr_type but if the passed in device is enslaved to a VRF then the table for that VRF is used for the lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/af_inet.c | 13 ++++++++++++- net/ipv4/arp.c | 15 +++++++++------ net/ipv4/fib_frontend.c | 25 ++++++++++++++++++++++--- net/ipv4/fib_semantics.c | 6 ++++-- net/ipv4/icmp.c | 5 +++-- 6 files changed, 53 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 6ba681f0b98d..6dda2c1bf8c6 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -192,6 +192,9 @@ unsigned int inet_addr_type(struct net *net, __be32 addr); unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); +unsigned int inet_addr_type_dev_table(struct net *net, + const struct net_device *dev, + __be32 addr); void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cc4e498a0ccf..c8b855882fa5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,6 +119,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif +#include /* The inetsw table contains everything that inet_create needs to @@ -427,6 +428,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; + int tb_id = RT_TABLE_LOCAL; int err; /* If the socket has its own bind function then use it. (RAW) */ @@ -448,7 +450,16 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + if (sk->sk_bound_dev_if) { + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (dev) + tb_id = vrf_dev_table_rcu(dev) ? : tb_id; + rcu_read_unlock(); + } + chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 34a308573f4b..30409b75e925 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -233,7 +233,7 @@ static int arp_constructor(struct neighbour *neigh) return -EINVAL; } - neigh->type = inet_addr_type(dev_net(dev), addr); + neigh->type = inet_addr_type_dev_table(dev_net(dev), dev, addr); parms = in_dev->arp_parms; __neigh_parms_put(neigh->parms); @@ -343,7 +343,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(dev_net(dev), + if (skb && inet_addr_type_dev_table(dev_net(dev), dev, ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; @@ -351,7 +351,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) { + if (inet_addr_type_dev_table(dev_net(dev), dev, + saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -751,7 +752,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && - inet_addr_type(net, tip) == RTN_LOCAL && + inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha); @@ -811,16 +812,18 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); if (IN_DEV_ARP_ACCEPT(in_dev)) { + unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip); + /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && - inet_addr_type(net, sip) == RTN_UNICAST; + addr_type == RTN_UNICAST; if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && - inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) + addr_type == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b11321a8e58d..c55723ec4c3e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -260,6 +260,19 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, } EXPORT_SYMBOL(inet_dev_addr_type); +/* inet_addr_type with dev == NULL but using the table from a dev + * if one is associated + */ +unsigned int inet_addr_type_dev_table(struct net *net, + const struct net_device *dev, + __be32 addr) +{ + int rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + + return __inet_dev_addr_type(net, NULL, addr, rt_table); +} +EXPORT_SYMBOL(inet_addr_type_dev_table); + __be32 fib_compute_spec_dst(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -510,9 +523,12 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, addr = sk_extract_addr(&rt->rt_gateway); if (rt->rt_gateway.sa_family == AF_INET && addr) { + unsigned int addr_type; + cfg->fc_gw = addr; + addr_type = inet_addr_type_table(net, addr, cfg->fc_table); if (rt->rt_flags & RTF_GATEWAY && - inet_addr_type(net, addr) == RTN_UNICAST) + addr_type == RTN_UNICAST) cfg->fc_scope = RT_SCOPE_UNIVERSE; } @@ -984,11 +1000,14 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); } if (!(ok & LOCAL_OK)) { + unsigned int addr_type; + fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (gone && - inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { + addr_type = inet_addr_type_dev_table(dev_net(dev), dev, + ifa->ifa_local); + if (gone && addr_type != RTN_LOCAL) { /* And the last, but not the least thing. * We must flush stray FIB entries. * diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 558e196bae0f..410ddb67221e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -670,16 +670,18 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_result res; if (nh->nh_flags & RTNH_F_ONLINK) { + unsigned int addr_type; if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; - if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) - return -EINVAL; dev = __dev_get_by_index(net, nh->nh_oif); if (!dev) return -ENODEV; if (!(dev->flags & IFF_UP)) return -ENETDOWN; + addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); + if (addr_type != RTN_UNICAST) + return -EINVAL; if (!netif_carrier_ok(dev)) nh->nh_flags |= RTNH_F_LINKDOWN; nh->nh_dev = dev; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1164fc4ce3bc..c6f1ce149ffb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -484,7 +484,8 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) { + if (inet_addr_type_dev_table(net, skb_in->dev, + fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) err = PTR_ERR(rt2); @@ -833,7 +834,7 @@ static bool icmp_unreach(struct sk_buff *skb) */ if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && - inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { + inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, icmph->type, icmph->code, -- cgit v1.2.3 From 2377799c084d86d22074cd4acd20edc32024d669 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Jul 2015 12:17:25 +0200 Subject: average: provide macro to create static EWMA Having the EWMA parameters stored in the runtime struct imposes memory requirements for the constant values that could just be inlined in the code. This particularly makes sense if there are a lot of such structs, for example in mac80211 in the station table where each station has a number of these in an array, and there can be many stations. Provide a macro DECLARE_EWMA() that declares the necessary struct and inline functions to access it with the parameters hard-coded; using this also means the user no longer needs to 'select AVERAGE' as it's entirely self-contained. In the mac80211 case, on x86-64, this actually slightly *reduces* code size, while also saving 80 bytes of runtime memory per sta. Signed-off-by: Johannes Berg --- include/linux/average.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/average.h b/include/linux/average.h index c6028fd742c1..60f8226c5652 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -27,4 +27,43 @@ static inline unsigned long ewma_read(const struct ewma *avg) return avg->internal >> avg->factor; } +#define DECLARE_EWMA(name, _factor, _weight) \ + struct ewma_##name { \ + unsigned long internal; \ + }; \ + static inline void ewma_##name##_init(struct ewma_##name *e) \ + { \ + BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + e->internal = 0; \ + } \ + static inline unsigned long \ + ewma_##name##_read(struct ewma_##name *e) \ + { \ + BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + return e->internal >> ilog2(_factor); \ + } \ + static inline void ewma_##name##_add(struct ewma_##name *e, \ + unsigned long val) \ + { \ + unsigned long internal = ACCESS_ONCE(e->internal); \ + unsigned long weight = ilog2(_weight); \ + unsigned long factor = ilog2(_factor); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + \ + ACCESS_ONCE(e->internal) = internal ? \ + (((internal << weight) - internal) + \ + (val << factor)) >> weight : \ + (val << factor); \ + } + #endif /* _LINUX_AVERAGE_H */ -- cgit v1.2.3 From 8f9c98df949333f08b74e5df1caacf7e2c5e8552 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 19 Jul 2015 16:09:12 +0300 Subject: mac80211: fix BIT position for TDLS WIDE extended cap The bit was not according to ieee80211 specification. Fix that. Reviewed-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b9c7897dc566..cfa906f28b7a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2074,8 +2074,8 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) #define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED BIT(7) +#define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(5) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) -#define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(7) /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 -- cgit v1.2.3 From fa8187c96471c49419c25d4ec3299d17d3f274b2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 13 Aug 2015 19:01:06 +0200 Subject: net: declare new net_device priv_flag IFF_NO_QUEUE This private net_device flag can be set by drivers to inform that a device runs fine without a qdisc attached. This was formerly done by setting tx_queue_len to zero. Signed-off-by: Phil Sutter Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f7a6ef2fae3a..d131755516ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1262,6 +1262,7 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device + * @IFF_NO_QUEUE: device can run without qdisc attached */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1290,6 +1291,7 @@ enum netdev_priv_flags { IFF_IPVLAN_MASTER = 1<<23, IFF_IPVLAN_SLAVE = 1<<24, IFF_VRF_MASTER = 1<<25, + IFF_NO_QUEUE = 1<<26, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1318,6 +1320,7 @@ enum netdev_priv_flags { #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE #define IFF_VRF_MASTER IFF_VRF_MASTER +#define IFF_NO_QUEUE IFF_NO_QUEUE /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From fbaff3ef859a86dc3df2128d2de9f8a6e255a967 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 13 Aug 2015 18:34:03 -0700 Subject: net: fix endian check warning in etherdevice.h Sparse builds have been warning for a really long time now that etherdevice.h has a conversion that is unsafe. include/linux/etherdevice.h:79:32: warning: restricted __be16 degrades to integer This code change fixes the issue and generates the exact same assembly before/after (checked on x86_64) Fixes: 2c722fe1c821 (etherdevice: Optimize a few is__ether_addr functions) Signed-off-by: Jesse Brandeburg CC: Joe Perches Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9012f8775208..eb049c622208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -76,7 +76,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr) #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) return (((*(const u32 *)addr) ^ (*(const u32 *)b)) | - ((a[2] ^ b[2]) & m)) == 0; + (__force int)((a[2] ^ b[2]) & m)) == 0; #else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; #endif -- cgit v1.2.3 From a1c234f95cae2d293047bb6c36e7a4840dbac815 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 14 Aug 2015 16:40:40 +0200 Subject: lwtunnel: rename ip lwtunnel attributes We already have IFLA_IPTUN_ netlink attributes. The IP_TUN_ attributes look very similar, yet they serve very different purpose. This is confusing for anyone trying to implement a user space tool supporting lwt. As the IP_TUN_ attributes are used only for the lightweight tunnels, prefix them with LWTUNNEL_IP_ instead to make their purpose clear. Also, it's more logical to have them in lwtunnel.h together with the encap enum. Fixes: 3093fbe7ff4b ("route: Per route IP tunnel metadata via lightweight tunnel") Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 14 +++++++ include/uapi/linux/rtnetlink.h | 15 -------- net/ipv4/ip_tunnel_core.c | 86 +++++++++++++++++++++--------------------- 3 files changed, 57 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 31377bbea3f8..3bf223bc2367 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -12,5 +12,19 @@ enum lwtunnel_encap_types { #define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1) +enum lwtunnel_ip_t { + LWTUNNEL_IP_UNSPEC, + LWTUNNEL_IP_ID, + LWTUNNEL_IP_DST, + LWTUNNEL_IP_SRC, + LWTUNNEL_IP_TTL, + LWTUNNEL_IP_TOS, + LWTUNNEL_IP_SPORT, + LWTUNNEL_IP_DPORT, + LWTUNNEL_IP_FLAGS, + __LWTUNNEL_IP_MAX, +}; + +#define LWTUNNEL_IP_MAX (__LWTUNNEL_IP_MAX - 1) #endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 47d24cb3fbc1..0d3d3cc43356 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -286,21 +286,6 @@ enum rt_class_t { /* Routing message attributes */ -enum ip_tunnel_t { - IP_TUN_UNSPEC, - IP_TUN_ID, - IP_TUN_DST, - IP_TUN_SRC, - IP_TUN_TTL, - IP_TUN_TOS, - IP_TUN_SPORT, - IP_TUN_DPORT, - IP_TUN_FLAGS, - __IP_TUN_MAX, -}; - -#define IP_TUN_MAX (__IP_TUN_MAX - 1) - enum rtattr_type_t { RTA_UNSPEC, RTA_DST, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5512f4e4ec1b..fd6319681c50 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -192,15 +192,15 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, } EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); -static const struct nla_policy ip_tun_policy[IP_TUN_MAX + 1] = { - [IP_TUN_ID] = { .type = NLA_U64 }, - [IP_TUN_DST] = { .type = NLA_U32 }, - [IP_TUN_SRC] = { .type = NLA_U32 }, - [IP_TUN_TTL] = { .type = NLA_U8 }, - [IP_TUN_TOS] = { .type = NLA_U8 }, - [IP_TUN_SPORT] = { .type = NLA_U16 }, - [IP_TUN_DPORT] = { .type = NLA_U16 }, - [IP_TUN_FLAGS] = { .type = NLA_U16 }, +static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = { + [LWTUNNEL_IP_ID] = { .type = NLA_U64 }, + [LWTUNNEL_IP_DST] = { .type = NLA_U32 }, + [LWTUNNEL_IP_SRC] = { .type = NLA_U32 }, + [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, + [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, + [LWTUNNEL_IP_SPORT] = { .type = NLA_U16 }, + [LWTUNNEL_IP_DPORT] = { .type = NLA_U16 }, + [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, }; static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, @@ -208,10 +208,10 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, { struct ip_tunnel_info *tun_info; struct lwtunnel_state *new_state; - struct nlattr *tb[IP_TUN_MAX + 1]; + struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, IP_TUN_MAX, attr, ip_tun_policy); + err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy); if (err < 0) return err; @@ -223,29 +223,29 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info = lwt_tun_info(new_state); - if (tb[IP_TUN_ID]) - tun_info->key.tun_id = nla_get_u64(tb[IP_TUN_ID]); + if (tb[LWTUNNEL_IP_ID]) + tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]); - if (tb[IP_TUN_DST]) - tun_info->key.ipv4_dst = nla_get_be32(tb[IP_TUN_DST]); + if (tb[LWTUNNEL_IP_DST]) + tun_info->key.ipv4_dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); - if (tb[IP_TUN_SRC]) - tun_info->key.ipv4_src = nla_get_be32(tb[IP_TUN_SRC]); + if (tb[LWTUNNEL_IP_SRC]) + tun_info->key.ipv4_src = nla_get_be32(tb[LWTUNNEL_IP_SRC]); - if (tb[IP_TUN_TTL]) - tun_info->key.ipv4_ttl = nla_get_u8(tb[IP_TUN_TTL]); + if (tb[LWTUNNEL_IP_TTL]) + tun_info->key.ipv4_ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]); - if (tb[IP_TUN_TOS]) - tun_info->key.ipv4_tos = nla_get_u8(tb[IP_TUN_TOS]); + if (tb[LWTUNNEL_IP_TOS]) + tun_info->key.ipv4_tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); - if (tb[IP_TUN_SPORT]) - tun_info->key.tp_src = nla_get_be16(tb[IP_TUN_SPORT]); + if (tb[LWTUNNEL_IP_SPORT]) + tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]); - if (tb[IP_TUN_DPORT]) - tun_info->key.tp_dst = nla_get_be16(tb[IP_TUN_DPORT]); + if (tb[LWTUNNEL_IP_DPORT]) + tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP_DPORT]); - if (tb[IP_TUN_FLAGS]) - tun_info->key.tun_flags = nla_get_u16(tb[IP_TUN_FLAGS]); + if (tb[LWTUNNEL_IP_FLAGS]) + tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options = NULL; @@ -261,14 +261,14 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_u64(skb, IP_TUN_ID, tun_info->key.tun_id) || - nla_put_be32(skb, IP_TUN_DST, tun_info->key.ipv4_dst) || - nla_put_be32(skb, IP_TUN_SRC, tun_info->key.ipv4_src) || - nla_put_u8(skb, IP_TUN_TOS, tun_info->key.ipv4_tos) || - nla_put_u8(skb, IP_TUN_TTL, tun_info->key.ipv4_ttl) || - nla_put_u16(skb, IP_TUN_SPORT, tun_info->key.tp_src) || - nla_put_u16(skb, IP_TUN_DPORT, tun_info->key.tp_dst) || - nla_put_u16(skb, IP_TUN_FLAGS, tun_info->key.tun_flags)) + if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || + nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.ipv4_dst) || + nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.ipv4_src) || + nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.ipv4_tos) || + nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ipv4_ttl) || + nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) || + nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) || + nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; return 0; @@ -276,14 +276,14 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) { - return nla_total_size(8) /* IP_TUN_ID */ - + nla_total_size(4) /* IP_TUN_DST */ - + nla_total_size(4) /* IP_TUN_SRC */ - + nla_total_size(1) /* IP_TUN_TOS */ - + nla_total_size(1) /* IP_TUN_TTL */ - + nla_total_size(2) /* IP_TUN_SPORT */ - + nla_total_size(2) /* IP_TUN_DPORT */ - + nla_total_size(2); /* IP_TUN_FLAGS */ + return nla_total_size(8) /* LWTUNNEL_IP_ID */ + + nla_total_size(4) /* LWTUNNEL_IP_DST */ + + nla_total_size(4) /* LWTUNNEL_IP_SRC */ + + nla_total_size(1) /* LWTUNNEL_IP_TOS */ + + nla_total_size(1) /* LWTUNNEL_IP_TTL */ + + nla_total_size(2) /* LWTUNNEL_IP_SPORT */ + + nla_total_size(2) /* LWTUNNEL_IP_DPORT */ + + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */ } static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { -- cgit v1.2.3 From 47dceb8ecdc1c3ad1818dfea3d659a05b74c3fc2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 14 Aug 2015 22:31:34 -0400 Subject: packet: add classic BPF fanout mode Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program to select a socket. This avoids having to keep adding special case fanout modes. One example use case is application layer load balancing. The QUIC protocol, for instance, encodes a connection ID in UDP payload. Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the only user so far. Signed-off-by: Willem de Bruijn Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 2 + net/packet/af_packet.c | 99 +++++++++++++++++++++++++++++++++++++++++- net/packet/internal.h | 5 ++- 3 files changed, 104 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index d3d715f8c88f..a4bb16fa822e 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -55,6 +55,7 @@ struct sockaddr_ll { #define PACKET_TX_HAS_OFF 19 #define PACKET_QDISC_BYPASS 20 #define PACKET_ROLLOVER_STATS 21 +#define PACKET_FANOUT_DATA 22 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 @@ -62,6 +63,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_ROLLOVER 3 #define PACKET_FANOUT_RND 4 #define PACKET_FANOUT_QM 5 +#define PACKET_FANOUT_CBPF 6 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5afe538bb88..8869d07013e6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -92,6 +92,7 @@ #ifdef CONFIG_INET #include #endif +#include #include "internal.h" @@ -1410,6 +1411,22 @@ static unsigned int fanout_demux_qm(struct packet_fanout *f, return skb_get_queue_mapping(skb) % num; } +static unsigned int fanout_demux_bpf(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + struct bpf_prog *prog; + unsigned int ret = 0; + + rcu_read_lock(); + prog = rcu_dereference(f->bpf_prog); + if (prog) + ret = BPF_PROG_RUN(prog, skb) % num; + rcu_read_unlock(); + + return ret; +} + static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); @@ -1454,6 +1471,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, false, num); break; + case PACKET_FANOUT_CBPF: + idx = fanout_demux_bpf(f, skb, num); + break; } if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) @@ -1502,6 +1522,74 @@ static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) return false; } +static void fanout_init_data(struct packet_fanout *f) +{ + switch (f->type) { + case PACKET_FANOUT_LB: + atomic_set(&f->rr_cur, 0); + break; + case PACKET_FANOUT_CBPF: + RCU_INIT_POINTER(f->bpf_prog, NULL); + break; + } +} + +static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) +{ + struct bpf_prog *old; + + spin_lock(&f->lock); + old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock)); + rcu_assign_pointer(f->bpf_prog, new); + spin_unlock(&f->lock); + + if (old) { + synchronize_net(); + bpf_prog_destroy(old); + } +} + +static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, + unsigned int len) +{ + struct bpf_prog *new; + struct sock_fprog fprog; + int ret; + + if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) + return -EPERM; + if (len != sizeof(fprog)) + return -EINVAL; + if (copy_from_user(&fprog, data, len)) + return -EFAULT; + + ret = bpf_prog_create_from_user(&new, &fprog, NULL); + if (ret) + return ret; + + __fanout_set_data_bpf(po->fanout, new); + return 0; +} + +static int fanout_set_data(struct packet_sock *po, char __user *data, + unsigned int len) +{ + switch (po->fanout->type) { + case PACKET_FANOUT_CBPF: + return fanout_set_data_cbpf(po, data, len); + default: + return -EINVAL; + }; +} + +static void fanout_release_data(struct packet_fanout *f) +{ + switch (f->type) { + case PACKET_FANOUT_CBPF: + __fanout_set_data_bpf(f, NULL); + }; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1519,6 +1607,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: case PACKET_FANOUT_QM: + case PACKET_FANOUT_CBPF: break; default: return -EINVAL; @@ -1561,10 +1650,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->id = id; match->type = type; match->flags = flags; - atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); atomic_set(&match->sk_ref, 0); + fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; @@ -1610,6 +1699,7 @@ static void fanout_release(struct sock *sk) if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); + fanout_release_data(f); kfree(f); } mutex_unlock(&fanout_mutex); @@ -3529,6 +3619,13 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_FANOUT_DATA: + { + if (!po->fanout) + return -EINVAL; + + return fanout_set_data(po, optval, optlen); + } case PACKET_TX_HAS_OFF: { unsigned int val; diff --git a/net/packet/internal.h b/net/packet/internal.h index e20b3e8829b8..9ee46314b7d7 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -79,7 +79,10 @@ struct packet_fanout { u16 id; u8 type; u8 flags; - atomic_t rr_cur; + union { + atomic_t rr_cur; + struct bpf_prog __rcu *bpf_prog; + }; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; spinlock_t lock; -- cgit v1.2.3 From f2e520956a1ab636698f8160194c9b8ac0989aab Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 14 Aug 2015 22:31:35 -0400 Subject: packet: add extended BPF fanout mode Add fanout mode PACKET_FANOUT_EBPF that accepts an en extended BPF program to select a socket. Update the internal eBPF program by passing to socket option SOL_PACKET/PACKET_FANOUT_DATA a file descriptor returned by bpf(). Signed-off-by: Willem de Bruijn Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + net/packet/af_packet.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index a4bb16fa822e..9e7edfd8141e 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -64,6 +64,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_RND 4 #define PACKET_FANOUT_QM 5 #define PACKET_FANOUT_CBPF 6 +#define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8869d07013e6..7b8e39a22387 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1472,6 +1472,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, idx = fanout_demux_rollover(f, skb, 0, false, num); break; case PACKET_FANOUT_CBPF: + case PACKET_FANOUT_EBPF: idx = fanout_demux_bpf(f, skb, num); break; } @@ -1529,6 +1530,7 @@ static void fanout_init_data(struct packet_fanout *f) atomic_set(&f->rr_cur, 0); break; case PACKET_FANOUT_CBPF: + case PACKET_FANOUT_EBPF: RCU_INIT_POINTER(f->bpf_prog, NULL); break; } @@ -1571,12 +1573,39 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, return 0; } +static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, + unsigned int len) +{ + struct bpf_prog *new; + u32 fd; + + if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) + return -EPERM; + if (len != sizeof(fd)) + return -EINVAL; + if (copy_from_user(&fd, data, len)) + return -EFAULT; + + new = bpf_prog_get(fd); + if (IS_ERR(new)) + return PTR_ERR(new); + if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) { + bpf_prog_put(new); + return -EINVAL; + } + + __fanout_set_data_bpf(po->fanout, new); + return 0; +} + static int fanout_set_data(struct packet_sock *po, char __user *data, unsigned int len) { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: return fanout_set_data_cbpf(po, data, len); + case PACKET_FANOUT_EBPF: + return fanout_set_data_ebpf(po, data, len); default: return -EINVAL; }; @@ -1586,6 +1615,7 @@ static void fanout_release_data(struct packet_fanout *f) { switch (f->type) { case PACKET_FANOUT_CBPF: + case PACKET_FANOUT_EBPF: __fanout_set_data_bpf(f, NULL); }; } @@ -1608,6 +1638,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) case PACKET_FANOUT_RND: case PACKET_FANOUT_QM: case PACKET_FANOUT_CBPF: + case PACKET_FANOUT_EBPF: break; default: return -EINVAL; -- cgit v1.2.3 From 6fa1bcab6be6e9bd93f80e345c7e9a4ec7861df9 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Sun, 16 Aug 2015 16:04:50 +0300 Subject: net/mlx5e: Ethtool link speed setting fixes - Port speed settings are applied by the device only upon port admin status transition from DOWN to UP. So we enforce this transition regardless of the port's current operation state (which may be occasionally DOWN if for example the network cable is disconnected). - Fix the PORT_UP/DOWN device interface enum - Set the local_port bit in the device PAOS register - EXPORT the PAOS (Port Administrative and Operational Status) register set/query access functions. Signed-off-by: Achiad Shochat Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 25 ++++++---------------- drivers/net/ethernet/mellanox/mlx5/core/port.c | 14 ++++++++---- include/linux/mlx5/driver.h | 11 +++++----- 3 files changed, 23 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 975828521913..77158b31eb0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -628,7 +628,7 @@ static int mlx5e_set_settings(struct net_device *netdev, u32 link_modes; u32 speed; u32 eth_proto_cap, eth_proto_admin; - u8 port_status; + enum mlx5_port_status ps; int err; speed = ethtool_cmd_speed(cmd); @@ -662,24 +662,13 @@ static int mlx5e_set_settings(struct net_device *netdev, if (link_modes == eth_proto_admin) goto out; - err = mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); - if (err) { - netdev_err(netdev, "%s: set port eth proto admin failed: %d\n", - __func__, err); - goto out; - } + mlx5_query_port_admin_status(mdev, &ps); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); - err = mlx5_query_port_status(mdev, &port_status); - if (err) - goto out; - - if (port_status == MLX5_PORT_DOWN) - return 0; - - err = mlx5_set_port_status(mdev, MLX5_PORT_DOWN); - if (err) - goto out; - err = mlx5_set_port_status(mdev, MLX5_PORT_UP); out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 70147999f657..f8db52bd8f18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -216,22 +216,25 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, } EXPORT_SYMBOL_GPL(mlx5_set_port_proto); -int mlx5_set_port_status(struct mlx5_core_dev *dev, - enum mlx5_port_status status) +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) { u32 in[MLX5_ST_SZ_DW(paos_reg)]; u32 out[MLX5_ST_SZ_DW(paos_reg)]; memset(in, 0, sizeof(in)); + MLX5_SET(paos_reg, in, local_port, 1); MLX5_SET(paos_reg, in, admin_status, status); MLX5_SET(paos_reg, in, ase, 1); return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 1); } +EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); -int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status) { u32 in[MLX5_ST_SZ_DW(paos_reg)]; u32 out[MLX5_ST_SZ_DW(paos_reg)]; @@ -239,14 +242,17 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) memset(in, 0, sizeof(in)); + MLX5_SET(paos_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 0); if (err) return err; - *status = MLX5_GET(paos_reg, out, oper_status); + *status = MLX5_GET(paos_reg, out, admin_status); return err; } +EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, int *max_mtu, int *oper_mtu, u8 port) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2039546b0ec6..4b5d7fc88d0f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -152,8 +152,8 @@ enum mlx5_dev_event { }; enum mlx5_port_status { - MLX5_PORT_UP = 1 << 1, - MLX5_PORT_DOWN = 1 << 2, + MLX5_PORT_UP = 1, + MLX5_PORT_DOWN = 2, }; struct mlx5_uuar_info { @@ -761,9 +761,10 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); -int mlx5_set_port_status(struct mlx5_core_dev *dev, - enum mlx5_port_status status); -int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -- cgit v1.2.3 From 3c2d18ef22df1bdccfb11a5b85b29e4e61b9d9c6 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Sun, 16 Aug 2015 16:04:51 +0300 Subject: net/mlx5e: Support ethtool get/set_pauseparam Only rx/tx pause settings. Autoneg setting is currently not supported. Signed-off-by: Achiad Shochat Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 38 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/port.c | 42 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 5 +++ 3 files changed, 85 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 77158b31eb0b..bce912688ca8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -820,6 +820,42 @@ static int mlx5e_set_tunable(struct net_device *dev, return err; } +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, + &pauseparam->tx_pause); + if (err) { + netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + __func__, err); + } +} + +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (pauseparam->autoneg) + return -EINVAL; + + err = mlx5_set_port_pause(mdev, + pauseparam->rx_pause ? 1 : 0, + pauseparam->tx_pause ? 1 : 0); + if (err) { + netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + __func__, err); + } + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -841,4 +877,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxnfc = mlx5e_get_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, + .get_pauseparam = mlx5e_get_pauseparam, + .set_pauseparam = mlx5e_set_pauseparam, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index f8db52bd8f18..821caaab9bfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -334,3 +334,45 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, return 0; } EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); + +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx, tx_pause); + MLX5_SET(pfcc_reg, in, pprx, rx_pause); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pause); + +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(pfcc_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 0); + if (err) + return err; + + if (rx_pause) + *rx_pause = MLX5_GET(pfcc_reg, out, pprx); + + if (tx_pause) + *tx_pause = MLX5_GET(pfcc_reg, out, pptx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pause); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b5d7fc88d0f..8b6d6f2154a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -103,6 +103,7 @@ enum { MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, MLX5_REG_PAOS = 0x5006, + MLX5_REG_PFCC = 0x5007, MLX5_REG_PPCNT = 0x5008, MLX5_REG_PMAOS = 0x5012, MLX5_REG_PUDE = 0x5009, @@ -774,6 +775,10 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause); + int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, -- cgit v1.2.3 From 2f52bdcf6ba1bd81597b1505a222430676548b3a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Aug 2015 07:49:20 -0600 Subject: net: Updates to netif_index_is_vrf As Eric noted netif_index_is_vrf is not called with rcu_read_lock held, so wrap the dev_get_by_index_rcu in rcu_read_lock and unlock. If VRF is not enabled or oif is 0 skip the device lookup. In both cases index cannot be the VRF master. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d131755516ca..8a530f930754 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3822,12 +3822,22 @@ static inline bool netif_is_vrf(const struct net_device *dev) static inline bool netif_index_is_vrf(struct net *net, int ifindex) { - struct net_device *dev = dev_get_by_index_rcu(net, ifindex); bool rc = false; +#if IS_ENABLED(CONFIG_NET_VRF) + struct net_device *dev; + + if (ifindex == 0) + return false; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); if (dev) rc = netif_is_vrf(dev); + rcu_read_unlock(); +#endif return rc; } -- cgit v1.2.3 From 808d28c468a89e8680396d98aea96024b6f5afdc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Aug 2015 10:26:49 -0600 Subject: net: Fix docbook warning for IFF_VRF_MASTER enum kbuild test robot reported: tree: git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master head: d52736e24fe2e927c26817256f8d1a3c8b5d51a0 commit: 4e3c89920cd3a6cfce22c6f537690747c26128dd [751/762] net: Introduce VRF related flags and helpers reproduce: make htmldocs >> Warning(include/linux/netdevice.h:1293): Enum value 'IFF_VRF_MASTER' not described in enum 'netdev_priv_flags' Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8a530f930754..4bd177faa90c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1262,6 +1262,7 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device + * @IFF_VRF_MASTER: device is a VRF master * @IFF_NO_QUEUE: device can run without qdisc attached */ enum netdev_priv_flags { -- cgit v1.2.3 From dc028da54ed353edd44dca88b7eb19fd5126c354 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Aug 2015 17:13:27 -0600 Subject: inet: Move VRF table lookup to inlined function Table lookup compiles out when VRF is not enabled. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/vrf.h | 24 ++++++++++++++++++++++++ net/ipv4/af_inet.c | 10 +--------- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/vrf.h b/include/net/vrf.h index 0484d29d4589..40e3793c7a05 100644 --- a/include/net/vrf.h +++ b/include/net/vrf.h @@ -81,6 +81,25 @@ static inline int vrf_dev_table(const struct net_device *dev) return tb_id; } +static inline int vrf_dev_table_ifindex(struct net *net, int ifindex) +{ + struct net_device *dev; + int tb_id = 0; + + if (!ifindex) + return 0; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + tb_id = vrf_dev_table_rcu(dev); + + rcu_read_unlock(); + + return tb_id; +} + /* called with rtnl */ static inline int vrf_dev_table_rtnl(const struct net_device *dev) { @@ -125,6 +144,11 @@ static inline int vrf_dev_table(const struct net_device *dev) return 0; } +static inline int vrf_dev_table_ifindex(struct net *net, int ifindex) +{ + return 0; +} + static inline int vrf_dev_table_rtnl(const struct net_device *dev) { return 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c8b855882fa5..675e88cac2b4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -450,15 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (sk->sk_bound_dev_if) { - struct net_device *dev; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); - if (dev) - tb_id = vrf_dev_table_rcu(dev) ? : tb_id; - rcu_read_unlock(); - } + tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too -- cgit v1.2.3 From 2536862311d2276454ddef9dc36d6551a4b400fd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:24 -0700 Subject: lwt: Add support to redirect dst.input This patch adds the capability to redirect dst input in the same way that dst output is redirected by LWT. Also, save the original dst.input and and dst.out when setting up lwtunnel redirection. These can be called by the client as a pass- through. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 30 ++++++++++++++++++++++++++- net/core/lwtunnel.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/route.c | 8 +++++++- net/ipv6/route.c | 8 +++++++- 4 files changed, 98 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 33bd30963a95..e25b60eb262d 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -11,12 +11,15 @@ #define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) /* lw tunnel state flags */ -#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1 +#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) +#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) struct lwtunnel_state { __u16 type; __u16 flags; atomic_t refcnt; + int (*orig_output)(struct sock *sk, struct sk_buff *skb); + int (*orig_input)(struct sk_buff *); int len; __u8 data[0]; }; @@ -25,6 +28,7 @@ struct lwtunnel_encap_ops { int (*build_state)(struct net_device *dev, struct nlattr *encap, struct lwtunnel_state **ts); int (*output)(struct sock *sk, struct sk_buff *skb); + int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); @@ -58,6 +62,13 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) + return true; + + return false; +} int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, @@ -72,6 +83,8 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct sock *sk, struct sk_buff *skb); int lwtunnel_output6(struct sock *sk, struct sk_buff *skb); +int lwtunnel_input(struct sk_buff *skb); +int lwtunnel_input6(struct sk_buff *skb); #else @@ -90,6 +103,11 @@ static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { @@ -142,6 +160,16 @@ static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb) return -EOPNOTSUPP; } +static inline int lwtunnel_input(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_input6(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + #endif #endif /* __NET_LWTUNNEL_H */ diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 5d6d8e3d450a..3331585174d9 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -241,3 +241,58 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb) return __lwtunnel_output(sk, skb, lwtstate); } EXPORT_SYMBOL(lwtunnel_output); + +int __lwtunnel_input(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (!lwtstate) + goto drop; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->input)) + ret = ops->input(skb); + rcu_read_unlock(); + + if (ret == -EOPNOTSUPP) + goto drop; + + return ret; + +drop: + kfree_skb(skb); + + return ret; +} + +int lwtunnel_input6(struct sk_buff *skb) +{ + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct lwtunnel_state *lwtstate = NULL; + + if (rt) + lwtstate = rt->rt6i_lwtstate; + + return __lwtunnel_input(skb, lwtstate); +} +EXPORT_SYMBOL(lwtunnel_input6); + +int lwtunnel_input(struct sk_buff *skb) +{ + struct rtable *rt = (struct rtable *)skb_dst(skb); + struct lwtunnel_state *lwtstate = NULL; + + if (rt) + lwtstate = rt->rt_lwtstate; + + return __lwtunnel_input(skb, lwtstate); +} +EXPORT_SYMBOL(lwtunnel_input); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c89d294b669..2403e85107f0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1631,8 +1631,14 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); - if (lwtunnel_output_redirect(rth->rt_lwtstate)) + if (lwtunnel_output_redirect(rth->rt_lwtstate)) { + rth->rt_lwtstate->orig_output = rth->dst.output; rth->dst.output = lwtunnel_output; + } + if (lwtunnel_input_redirect(rth->rt_lwtstate)) { + rth->rt_lwtstate->orig_input = rth->dst.input; + rth->dst.input = lwtunnel_input; + } skb_dst_set(skb, &rth->dst); out: err = 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1c0217e61357..c3733049715e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1785,8 +1785,14 @@ int ip6_route_add(struct fib6_config *cfg) if (err) goto out; rt->rt6i_lwtstate = lwtstate_get(lwtstate); - if (lwtunnel_output_redirect(rt->rt6i_lwtstate)) + if (lwtunnel_output_redirect(rt->rt6i_lwtstate)) { + rt->rt6i_lwtstate->orig_output = rt->dst.output; rt->dst.output = lwtunnel_output6; + } + if (lwtunnel_input_redirect(rt->rt6i_lwtstate)) { + rt->rt6i_lwtstate->orig_input = rt->dst.input; + rt->dst.input = lwtunnel_input6; + } } ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); -- cgit v1.2.3 From 4b048d6d9d0b0b90e1e94f2393796bbf1fa8df4e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:25 -0700 Subject: net: Change pseudohdr argument of inet_proto_csum_replace* to be a bool inet_proto_csum_replace4,2,16 take a pseudohdr argument which indicates the checksum field carries a pseudo header. This argument should be a boolean instead of an int. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/checksum.h | 6 +++--- net/core/filter.c | 2 +- net/core/utils.c | 4 ++-- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++-- net/ipv4/netfilter/nf_nat_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++-- net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_seqadj.c | 9 +++++---- net/netfilter/nf_nat_proto_dccp.c | 2 +- net/netfilter/nf_nat_proto_tcp.c | 2 +- net/netfilter/nf_nat_proto_udp.c | 2 +- net/netfilter/nf_nat_proto_udplite.c | 2 +- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/xt_TCPMSS.c | 8 ++++---- net/netfilter/xt_TCPOPTSTRIP.c | 2 +- net/openvswitch/actions.c | 12 ++++++------ net/sched/act_nat.c | 7 ++++--- 18 files changed, 38 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 2d1d73cb773e..619f3445d57e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -140,14 +140,14 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr); + __be32 from, __be32 to, bool pseudohdr); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr); + bool pseudohdr); static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, - int pseudohdr) + bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); diff --git a/net/core/filter.c b/net/core/filter.c index f8184222465e..83f08cefeab7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1349,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); int offset = (int) r2; __sum16 sum, *ptr; diff --git a/net/core/utils.c b/net/core/utils.c index a7732a068043..cd7d202f340e 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -301,7 +301,7 @@ out: EXPORT_SYMBOL(in6_pton); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, int pseudohdr) + __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); @@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4); void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, - int pseudohdr) + bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4bf3dc49ad1e..270765236f5e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -72,7 +72,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) tcph->cwr = einfo->proto.tcp.cwr; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return true; } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e59cc05c09e9..22f4579b0c2a 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -120,7 +120,7 @@ static void nf_nat_ipv4_csum_update(struct sk_buff *skb, oldip = iph->daddr; newip = t->dst.u3.ip; } - inet_proto_csum_replace4(check, skb, oldip, newip, 1); + inet_proto_csum_replace4(check, skb, oldip, newip, true); } static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, @@ -151,7 +151,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 4557b4ab8342..7b98baa13ede 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff *skb, hdr = (struct icmphdr *)(skb->data + hdroff); inet_proto_csum_replace2(&hdr->checksum, skb, - hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id, tuple->src.u.icmp.id, false); hdr->un.echo.id = tuple->src.u.icmp.id; return true; } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index e76900e0aa92..70fbaed49edb 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb, newip = &t->dst.u3.in6; } inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, 1); + newip->s6_addr32, true); } static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, @@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, } } else inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); + htons(oldlen), htons(datalen), true); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 2205e8eeeacf..57593b00c5b4 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb, hdr->icmp6_type == ICMPV6_ECHO_REPLY) { inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, hdr->icmp6_identifier, - tuple->src.u.icmp.id, 0); + tuple->src.u.icmp.id, false); hdr->icmp6_identifier = tuple->src.u.icmp.id; } return true; diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ce3e840c8704..dff0f0cc59e4 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); + sack->start_seq, new_start_seq, false); inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); + sack->end_seq, new_end_seq, false); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb, newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, + false); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index b8067b53ff3a..15c47b246d0d 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); + false); return true; } diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 37f5505f4529..4f8820fc5148 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb, return true; l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b0ede2f0d8bc..b1e627227b6e 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); + false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; } diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 368f14e01e75..58340c97bd83 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb, } l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index d7f168527903..14f8b43ec5a7 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -225,7 +225,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, synproxy->tsoff); } inet_proto_csum_replace4(&th->check, skb, - old, *ptr, 0); + old, *ptr, false); return 1; } optoff += op[1]; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c3190e2fc6a..8c02501a530f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), - 0); + false); return 0; } } @@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb, memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(len), htons(len + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), true); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return TCPOLEN_MSS; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 625fa1d636a0..eb92bffff11c 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, n <<= 8; } inet_proto_csum_replace2(&tcph->check, skb, htons(o), - htons(n), 0); + htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 14da52ddd327..4f4200717bef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -284,14 +284,14 @@ static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, if (nh->protocol == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (nh->protocol == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -316,14 +316,14 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, - addr, new_addr, 1); + addr, new_addr, true); } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&uh->check, skb, - addr, new_addr, 1); + addr, new_addr, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -331,7 +331,7 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } else if (l4_proto == NEXTHDR_ICMP) { if (likely(transport_len >= sizeof(struct icmp6hdr))) inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, - skb, addr, new_addr, 1); + skb, addr, new_addr, true); } } @@ -498,7 +498,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { - inet_proto_csum_replace2(check, skb, *port, new_port, 0); + inet_proto_csum_replace2(check, skb, *port, new_port, false); *port = new_port; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 5be0b3c1c5b0..b7c4ead8b5a8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -162,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, goto drop; tcph = (void *)(skb_network_header(skb) + ihl); - inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, + true); break; } case IPPROTO_UDP: @@ -178,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, udph = (void *)(skb_network_header(skb) + ihl); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, - new_addr, 1); + new_addr, true); if (!udph->check) udph->check = CSUM_MANGLED_0; } @@ -231,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 0); + false); break; } default: -- cgit v1.2.3 From abc5d1ff3e8f9b4a9d274818459b123e31981dc9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:26 -0700 Subject: net: Add inet_proto_csum_replace_by_diff utility function This function updates a checksum field value and skb->csum based on a value which is the difference between the old and new checksum. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/checksum.h | 2 ++ net/core/utils.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 619f3445d57e..9fcaedf994ee 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -144,6 +144,8 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); +void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + __wsum diff, bool pseudohdr); static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, __be16 from, __be16 to, diff --git a/net/core/utils.c b/net/core/utils.c index cd7d202f340e..3dffce953c39 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -336,6 +336,19 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace16); +void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + __wsum diff, bool pseudohdr) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_add(diff, ~skb->csum); + } else if (pseudohdr) { + *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); + } +} +EXPORT_SYMBOL(inet_proto_csum_replace_by_diff); + struct __net_random_once_work { struct work_struct work; struct static_key *key; -- cgit v1.2.3 From 65d7ab8de582bc668e3dabb6ff48f750098a6e78 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:27 -0700 Subject: net: Identifier Locator Addressing module Adding new module name ila. This implements ILA translation. Light weight tunnel redirection is used to perform the translation in the data path. This is configured by the "ip -6 route" command using the "encap ila " option, where is the value to set in destination locator of the packet. e.g. ip -6 route add 3333:0:0:1:5555:0:1:0/128 \ encap ila 2001:0:0:1 via 2401:db00:20:911a:face:0:25:0 Sets a route where 3333:0:0:1 will be overwritten by 2001:0:0:1 on output. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/ila.h | 15 +++ include/uapi/linux/lwtunnel.h | 1 + net/ipv6/Kconfig | 19 ++++ net/ipv6/Makefile | 1 + net/ipv6/ila.c | 216 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 252 insertions(+) create mode 100644 include/uapi/linux/ila.h create mode 100644 net/ipv6/ila.c (limited to 'include') diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h new file mode 100644 index 000000000000..7ed9e670814e --- /dev/null +++ b/include/uapi/linux/ila.h @@ -0,0 +1,15 @@ +/* ila.h - ILA Interface */ + +#ifndef _UAPI_LINUX_ILA_H +#define _UAPI_LINUX_ILA_H + +enum { + ILA_ATTR_UNSPEC, + ILA_ATTR_LOCATOR, /* u64 */ + + __ILA_ATTR_MAX, +}; + +#define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) + +#endif /* _UAPI_LINUX_ILA_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 3bf223bc2367..aa84ca396bcb 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -7,6 +7,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_NONE, LWTUNNEL_ENCAP_MPLS, LWTUNNEL_ENCAP_IP, + LWTUNNEL_ENCAP_ILA, __LWTUNNEL_ENCAP_MAX, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 643f61339e7b..983bb999738c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -92,6 +92,25 @@ config IPV6_MIP6 If unsure, say N. +config IPV6_ILA + tristate "IPv6: Identifier Locator Addressing (ILA)" + select LWTUNNEL + ---help--- + Support for IPv6 Identifier Locator Addressing (ILA). + + ILA is a mechanism to do network virtualization without + encapsulation. The basic concept of ILA is that we split an + IPv6 address into a 64 bit locator and 64 bit identifier. The + identifier is the identity of an entity in communication + ("who") and the locator expresses the location of the + entity ("where"). + + ILA can be configured using the "encap ila" option with + "ip -6 route" command. ILA is described in + https://tools.ietf.org/html/draft-herbert-nvo3-ila-00. + + If unsure, say N. + config INET6_XFRM_TUNNEL tristate select INET6_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0f3f1999719a..2c900c7b7eb1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o +obj-$(CONFIG_IPV6_ILA) += ila.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c new file mode 100644 index 000000000000..2540ab4b76d1 --- /dev/null +++ b/net/ipv6/ila.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ila_params { + __be64 locator; +}; + +static inline struct ila_params *ila_params_lwtunnel( + struct lwtunnel_state *lwstate) +{ + return (struct ila_params *)lwstate->data; +} + +static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], to[0], to[1], + }; + + return csum_partial(diff, sizeof(diff), 0); +} + +static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + return compute_csum_diff8((__be32 *)&ip6h->daddr, + (__be32 *)&p->locator); +} + +static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + __wsum diff; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + size_t nhoff = sizeof(struct ipv6hdr); + + /* First update checksum */ + switch (ip6h->nexthdr) { + case NEXTHDR_TCP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { + struct tcphdr *th = (struct tcphdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, + diff, true); + } + break; + case NEXTHDR_UDP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { + struct udphdr *uh = (struct udphdr *) + (skb_network_header(skb) + nhoff); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, + diff, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + break; + case NEXTHDR_ICMP: + if (likely(pskb_may_pull(skb, + nhoff + sizeof(struct icmp6hdr)))) { + struct icmp6hdr *ih = (struct icmp6hdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, + diff, true); + } + break; + } + + /* Now change destination address */ + *(__be64 *)&ip6h->daddr = p->locator; +} + +static int ila_output(struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct rt6_info *rt6 = NULL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + rt6 = (struct rt6_info *)dst; + + update_ipv6_locator(skb, ila_params_lwtunnel(rt6->rt6i_lwtstate)); + + return rt6->rt6i_lwtstate->orig_output(sk, skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int ila_input(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct rt6_info *rt6 = NULL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + rt6 = (struct rt6_info *)dst; + + update_ipv6_locator(skb, ila_params_lwtunnel(rt6->rt6i_lwtstate)); + + return rt6->rt6i_lwtstate->orig_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, +}; + +static int ila_build_state(struct net_device *dev, struct nlattr *nla, + struct lwtunnel_state **ts) +{ + struct ila_params *p; + struct nlattr *tb[ILA_ATTR_MAX + 1]; + size_t encap_len = sizeof(*p); + struct lwtunnel_state *newts; + int ret; + + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, + ila_nl_policy); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) + return -EINVAL; + + newts = lwtunnel_state_alloc(encap_len); + if (!newts) + return -ENOMEM; + + newts->len = encap_len; + p = ila_params_lwtunnel(newts); + + p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + + newts->type = LWTUNNEL_ENCAP_ILA; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + + *ts = newts; + + return 0; +} + +static int ila_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ila_params *p = ila_params_lwtunnel(lwtstate); + + if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + /* No encapsulation overhead */ + return 0; +} + +static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ila_params *a_p = ila_params_lwtunnel(a); + struct ila_params *b_p = ila_params_lwtunnel(b); + + return (a_p->locator != b_p->locator); +} + +static const struct lwtunnel_encap_ops ila_encap_ops = { + .build_state = ila_build_state, + .output = ila_output, + .input = ila_input, + .fill_encap = ila_fill_encap_info, + .get_encap_size = ila_encap_nlsize, + .cmp_encap = ila_encap_cmp, +}; + +static int __init ila_init(void) +{ + return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +static void __exit ila_fini(void) +{ + lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 74f4e0cc61080f63f28e8d519bdf437957e64217 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 14 Aug 2015 00:21:45 +0200 Subject: bcma: switch GPIO portions to use GPIOLIB_IRQCHIP This switches the BCMA GPIO driver to use GPIOLIB_IRQCHIP to handle its interrupts instead of rolling its own copy of the irqdomain handling etc. Signed-off-by: Linus Walleij Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo --- drivers/bcma/Kconfig | 2 +- drivers/bcma/driver_gpio.c | 92 ++++++++++------------------- include/linux/bcma/bcma_driver_chipcommon.h | 1 - 3 files changed, 31 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index be5fffb6da24..023d448ed3fa 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -92,7 +92,7 @@ config BCMA_DRIVER_GMAC_CMN config BCMA_DRIVER_GPIO bool "BCMA GPIO driver" depends on BCMA && GPIOLIB - select IRQ_DOMAIN if BCMA_HOST_SOC + select GPIOLIB_IRQCHIP if BCMA_HOST_SOC help Driver to provide access to the GPIO pins of the bcma bus. diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index 5f6018e7cd4c..504899a72966 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -8,10 +8,8 @@ * Licensed under the GNU/GPL. See COPYING for details. */ -#include -#include +#include #include -#include #include #include @@ -79,19 +77,11 @@ static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio) } #if IS_BUILTIN(CONFIG_BCM47XX) || IS_BUILTIN(CONFIG_ARCH_BCM_5301X) -static int bcma_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) -{ - struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); - - if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC) - return irq_find_mapping(cc->irq_domain, gpio); - else - return -EINVAL; -} static void bcma_gpio_irq_unmask(struct irq_data *d) { - struct bcma_drv_cc *cc = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct bcma_drv_cc *cc = bcma_gpio_get_cc(gc); int gpio = irqd_to_hwirq(d); u32 val = bcma_chipco_gpio_in(cc, BIT(gpio)); @@ -101,7 +91,8 @@ static void bcma_gpio_irq_unmask(struct irq_data *d) static void bcma_gpio_irq_mask(struct irq_data *d) { - struct bcma_drv_cc *cc = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct bcma_drv_cc *cc = bcma_gpio_get_cc(gc); int gpio = irqd_to_hwirq(d); bcma_chipco_gpio_intmask(cc, BIT(gpio), 0); @@ -116,6 +107,7 @@ static struct irq_chip bcma_gpio_irq_chip = { static irqreturn_t bcma_gpio_irq_handler(int irq, void *dev_id) { struct bcma_drv_cc *cc = dev_id; + struct gpio_chip *gc = &cc->gpio; u32 val = bcma_cc_read32(cc, BCMA_CC_GPIOIN); u32 mask = bcma_cc_read32(cc, BCMA_CC_GPIOIRQ); u32 pol = bcma_cc_read32(cc, BCMA_CC_GPIOPOL); @@ -125,81 +117,58 @@ static irqreturn_t bcma_gpio_irq_handler(int irq, void *dev_id) if (!irqs) return IRQ_NONE; - for_each_set_bit(gpio, &irqs, cc->gpio.ngpio) - generic_handle_irq(bcma_gpio_to_irq(&cc->gpio, gpio)); + for_each_set_bit(gpio, &irqs, gc->ngpio) + generic_handle_irq(irq_find_mapping(gc->irqdomain, gpio)); bcma_chipco_gpio_polarity(cc, irqs, val & irqs); return IRQ_HANDLED; } -static int bcma_gpio_irq_domain_init(struct bcma_drv_cc *cc) +static int bcma_gpio_irq_init(struct bcma_drv_cc *cc) { struct gpio_chip *chip = &cc->gpio; - int gpio, hwirq, err; + int hwirq, err; if (cc->core->bus->hosttype != BCMA_HOSTTYPE_SOC) return 0; - cc->irq_domain = irq_domain_add_linear(NULL, chip->ngpio, - &irq_domain_simple_ops, cc); - if (!cc->irq_domain) { - err = -ENODEV; - goto err_irq_domain; - } - for (gpio = 0; gpio < chip->ngpio; gpio++) { - int irq = irq_create_mapping(cc->irq_domain, gpio); - - irq_set_chip_data(irq, cc); - irq_set_chip_and_handler(irq, &bcma_gpio_irq_chip, - handle_simple_irq); - } - hwirq = bcma_core_irq(cc->core, 0); err = request_irq(hwirq, bcma_gpio_irq_handler, IRQF_SHARED, "gpio", cc); if (err) - goto err_req_irq; + return err; bcma_chipco_gpio_intmask(cc, ~0, 0); bcma_cc_set32(cc, BCMA_CC_IRQMASK, BCMA_CC_IRQ_GPIO); - return 0; - -err_req_irq: - for (gpio = 0; gpio < chip->ngpio; gpio++) { - int irq = irq_find_mapping(cc->irq_domain, gpio); - - irq_dispose_mapping(irq); + err = gpiochip_irqchip_add(chip, + &bcma_gpio_irq_chip, + 0, + handle_simple_irq, + IRQ_TYPE_NONE); + if (err) { + free_irq(hwirq, cc); + return err; } - irq_domain_remove(cc->irq_domain); -err_irq_domain: - return err; + + return 0; } -static void bcma_gpio_irq_domain_exit(struct bcma_drv_cc *cc) +static void bcma_gpio_irq_exit(struct bcma_drv_cc *cc) { - struct gpio_chip *chip = &cc->gpio; - int gpio; - if (cc->core->bus->hosttype != BCMA_HOSTTYPE_SOC) return; bcma_cc_mask32(cc, BCMA_CC_IRQMASK, ~BCMA_CC_IRQ_GPIO); free_irq(bcma_core_irq(cc->core, 0), cc); - for (gpio = 0; gpio < chip->ngpio; gpio++) { - int irq = irq_find_mapping(cc->irq_domain, gpio); - - irq_dispose_mapping(irq); - } - irq_domain_remove(cc->irq_domain); } #else -static int bcma_gpio_irq_domain_init(struct bcma_drv_cc *cc) +static int bcma_gpio_irq_init(struct bcma_drv_cc *cc) { return 0; } -static void bcma_gpio_irq_domain_exit(struct bcma_drv_cc *cc) +static void bcma_gpio_irq_exit(struct bcma_drv_cc *cc) { } #endif @@ -218,9 +187,8 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->set = bcma_gpio_set_value; chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; -#if IS_BUILTIN(CONFIG_BCM47XX) || IS_BUILTIN(CONFIG_ARCH_BCM_5301X) - chip->to_irq = bcma_gpio_to_irq; -#endif + chip->owner = THIS_MODULE; + chip->dev = bcma_bus_get_host_dev(bus); #if IS_BUILTIN(CONFIG_OF) if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC) chip->of_node = cc->core->dev.of_node; @@ -248,13 +216,13 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) else chip->base = -1; - err = bcma_gpio_irq_domain_init(cc); + err = gpiochip_add(chip); if (err) return err; - err = gpiochip_add(chip); + err = bcma_gpio_irq_init(cc); if (err) { - bcma_gpio_irq_domain_exit(cc); + gpiochip_remove(chip); return err; } @@ -263,7 +231,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) int bcma_gpio_unregister(struct bcma_drv_cc *cc) { - bcma_gpio_irq_domain_exit(cc); + bcma_gpio_irq_exit(cc); gpiochip_remove(&cc->gpio); return 0; } diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 6cceedf65ca2..cf038431a5cc 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -640,7 +640,6 @@ struct bcma_drv_cc { spinlock_t gpio_lock; #ifdef CONFIG_BCMA_DRIVER_GPIO struct gpio_chip gpio; - struct irq_domain *irq_domain; #endif }; -- cgit v1.2.3 From 60045cbfc0b291dae8dd5b929d67b87c5ea954d4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 17 Aug 2015 23:52:51 +0200 Subject: net: dsa: Add dsa_is_dsa_port() helper Add an inline helper for determining is a port is a DSA port. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 5 ++--- include/net/dsa.h | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 332f2c8090d0..486e9792fc05 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1926,8 +1926,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) * full duplex. */ reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); - if (dsa_is_cpu_port(ds, port) || - ds->dsa_port_mask & (1 << port)) { + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP | PORT_PCS_CTRL_DUPLEX_FULL | @@ -1992,7 +1991,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || mv88e6xxx_6320_family(ds)) { - if (ds->dsa_port_mask & (1 << port)) + if (dsa_is_dsa_port(ds, port)) reg |= PORT_CONTROL_FRAME_MODE_DSA; if (port == dsa_upstream_port(ds)) reg |= PORT_CONTROL_FORWARD_UNKNOWN | diff --git a/include/net/dsa.h b/include/net/dsa.h index bd9b76502458..b34d812bc5d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -171,6 +171,11 @@ static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); } +static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) +{ + return !!((ds->dsa_port_mask) & (1 << p)); +} + static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { return ds->phys_port_mask & (1 << p) && ds->ports[p]; -- cgit v1.2.3 From df383e6240ef222703648072dafd2a1ae21b0d2a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 18 Aug 2015 18:41:13 +0200 Subject: lwtunnel: fix memory leak The built lwtunnel_state struct has to be freed after comparison. Fixes: 571e722676fe3 ("ipv4: support for fib route lwtunnel encap attributes") Signed-off-by: Jiri Benc Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 7 ++++++- net/ipv4/fib_semantics.c | 10 ++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index e25b60eb262d..34fd8f70c2ca 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -36,6 +36,11 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ + kfree(lws); +} + static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { @@ -51,7 +56,7 @@ static inline void lwtstate_put(struct lwtunnel_state *lws) return; if (atomic_dec_and_test(&lws->refcnt)) - kfree(lws); + lwtstate_free(lws); } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c8025851dac7..d5253071f71f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -539,7 +539,7 @@ int fib_encap_match(struct net *net, u16 encap_type, { struct lwtunnel_state *lwtstate; struct net_device *dev = NULL; - int ret; + int ret, result = 0; if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; @@ -548,10 +548,12 @@ int fib_encap_match(struct net *net, u16 encap_type, dev = __dev_get_by_index(net, oif); ret = lwtunnel_build_state(dev, encap_type, encap, &lwtstate); - if (!ret) - return lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + if (!ret) { + result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + lwtstate_free(lwtstate); + } - return 0; + return result; } int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) -- cgit v1.2.3 From db5dbec5ef2d4565bb8d42709802de66b06f9965 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Aug 2015 20:28:02 +0300 Subject: vrf: drop unused num_slaves member slave_queue has a num_slaves member which is unused, drop it. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 -- include/net/vrf.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cd4bc77f2e04..3d7da0c6f827 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -335,13 +335,11 @@ static struct slave *__vrf_find_slave_dev(struct slave_queue *queue, static void __vrf_remove_slave(struct slave_queue *queue, struct slave *slave) { list_del(&slave->list); - queue->num_slaves--; } static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave) { list_add(&slave->list, &queue->all_slaves); - queue->num_slaves++; } static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) diff --git a/include/net/vrf.h b/include/net/vrf.h index 40e3793c7a05..3bb4af462ed6 100644 --- a/include/net/vrf.h +++ b/include/net/vrf.h @@ -24,7 +24,6 @@ struct slave { struct slave_queue { struct list_head all_slaves; - int num_slaves; }; struct net_vrf { -- cgit v1.2.3 From 824e7383e92815cb591793c74cc836aa5165f7f8 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 19 Aug 2015 15:46:17 +0800 Subject: lwtunnel: Fix the sparse warnings in fib_encap_match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_LWTUNNEL config is not enabled, the lwtstate_free() is not declared in lwtunnel.h at all. However, even in this case, the function is still referenced in fib_semantics.c so that there appears the following sparse warnings: net/ipv4/fib_semantics.c:553:17: error: undefined identifier 'lwtstate_free' CC net/ipv4/fib_semantics.o net/ipv4/fib_semantics.c: In function ‘fib_encap_match’: net/ipv4/fib_semantics.c:553:3: error: implicit declaration of function ‘lwtstate_free’ [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors make[1]: *** [net/ipv4/fib_semantics.o] Error 1 make: *** [net/ipv4/fib_semantics.o] Error 2 To eliminate the error, we define an empty function for lwtstate_free() in lwtunnel.h when CONFIG_LWTUNNEL is disabled. Fixes: df383e6240ef ("lwtunnel: fix memory leak") Cc: Jiri Benc Reported-by: kbuild test robot Signed-off-by: Ying Xue Acked-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 34fd8f70c2ca..cfee53916ba5 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -93,6 +93,10 @@ int lwtunnel_input6(struct sk_buff *skb); #else +static inline void lwtstate_free(struct lwtunnel_state *lws) +{ +} + static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { -- cgit v1.2.3 From 18041e31743d278b6323518d20a2ef656c3cc689 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Aug 2015 21:40:16 +0300 Subject: vrf: vrf_master_ifindex_rcu is not always called with rcu read lock While running net-next I hit this: [ 634.073119] =============================== [ 634.073150] [ INFO: suspicious RCU usage. ] [ 634.073182] 4.2.0-rc6+ #45 Not tainted [ 634.073213] ------------------------------- [ 634.073244] include/net/vrf.h:38 suspicious rcu_dereference_check() usage! [ 634.073274] other info that might help us debug this: [ 634.073307] rcu_scheduler_active = 1, debug_locks = 1 [ 634.073338] 2 locks held by swapper/0/0: [ 634.073369] #0: (((&n->timer))){+.-...}, at: [] call_timer_fn+0x5/0x480 [ 634.073412] #1: (slock-AF_INET){+.-...}, at: [] icmp_send+0x155/0x5f0 [ 634.073450] stack backtrace: [ 634.073483] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.2.0-rc6+ #45 [ 634.073514] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 634.073545] 0000000000000000 0593ba8242d9ace4 ffff88002fc03b48 ffffffff81803f1b [ 634.073612] 0000000000000000 ffffffff81e12500 ffff88002fc03b78 ffffffff811003c5 [ 634.073642] 0000000000000000 ffff88002ec4e600 ffffffff81f00f80 ffff88002fc03cf0 [ 634.073669] Call Trace: [ 634.073694] [] dump_stack+0x4c/0x65 [ 634.073728] [] lockdep_rcu_suspicious+0xc5/0x100 [ 634.073763] [] icmp_route_lookup+0x176/0x5c0 [ 634.073793] [] ? icmp_send+0x35b/0x5f0 [ 634.073818] [] ? icmp_send+0x2d4/0x5f0 [ 634.073844] [] icmp_send+0x42e/0x5f0 [ 634.073873] [] ipv4_link_failure+0x22/0xa0 [ 634.073899] [] arp_error_report+0x3a/0x80 [ 634.073926] [] ? neigh_lookup+0x2c0/0x2c0 [ 634.073952] [] neigh_invalidate+0x8e/0x110 [ 634.073984] [] neigh_timer_handler+0x1ae/0x290 [ 634.074013] [] ? neigh_lookup+0x2c0/0x2c0 [ 634.074013] [] call_timer_fn+0xb3/0x480 [ 634.074013] [] ? call_timer_fn+0x5/0x480 [ 634.074013] [] ? neigh_lookup+0x2c0/0x2c0 [ 634.074013] [] run_timer_softirq+0x20c/0x430 [ 634.074013] [] __do_softirq+0xde/0x630 [ 634.074013] [] irq_exit+0x117/0x120 [ 634.074013] [] smp_apic_timer_interrupt+0x46/0x60 [ 634.074013] [] apic_timer_interrupt+0x70/0x80 [ 634.074013] [] ? native_safe_halt+0x6/0x10 [ 634.074013] [] ? trace_hardirqs_on+0xd/0x10 [ 634.074013] [] default_idle+0x23/0x200 [ 634.074013] [] arch_cpu_idle+0xf/0x20 [ 634.074013] [] default_idle_call+0x2a/0x40 [ 634.074013] [] cpu_startup_entry+0x39c/0x4c0 [ 634.074013] [] rest_init+0x13d/0x150 [ 634.074013] [] start_kernel+0x4a8/0x4c9 [ 634.074013] [] ? early_idt_handler_array+0x120/0x120 [ 634.074013] [] x86_64_start_reservations+0x2a/0x2c [ 634.074013] [] x86_64_start_kernel+0x14a/0x16d It would seem vrf_master_ifindex_rcu() can be called without RCU held in other contexts as well so introduce a new helper which acquires rcu and returns the ifindex. Also add curly braces around both the "if" and "else" parts as per the style guide. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/vrf.h | 20 ++++++++++++++++++-- net/ipv4/icmp.c | 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/vrf.h b/include/net/vrf.h index 3bb4af462ed6..5bfb16237fd7 100644 --- a/include/net/vrf.h +++ b/include/net/vrf.h @@ -43,9 +43,9 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev) if (!dev) return 0; - if (netif_is_vrf(dev)) + if (netif_is_vrf(dev)) { ifindex = dev->ifindex; - else { + } else { vrf_ptr = rcu_dereference(dev->vrf_ptr); if (vrf_ptr) ifindex = vrf_ptr->ifindex; @@ -54,6 +54,17 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev) return ifindex; } +static inline int vrf_master_ifindex(const struct net_device *dev) +{ + int ifindex; + + rcu_read_lock(); + ifindex = vrf_master_ifindex_rcu(dev); + rcu_read_unlock(); + + return ifindex; +} + /* called with rcu_read_lock */ static inline int vrf_dev_table_rcu(const struct net_device *dev) { @@ -133,6 +144,11 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev) return 0; } +static inline int vrf_master_ifindex(const struct net_device *dev) +{ + return 0; +} + static inline int vrf_dev_table_rcu(const struct net_device *dev) { return 0; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c6f1ce149ffb..f16488efa1c8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -426,7 +426,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; - fl4.flowi4_oif = vrf_master_ifindex_rcu(skb->dev) ? : skb->dev->ifindex; + fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -460,7 +460,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = vrf_master_ifindex_rcu(skb_in->dev) ? : skb_in->dev->ifindex; + fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex; security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); -- cgit v1.2.3 From f4e774f55fe0bb568a0877b2eb9e1b4b5a6f5cbc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Aug 2015 09:46:22 +0200 Subject: average: remove out-of-line implementation Since all users are now converted to the inline implementation, remove the out-of-line implementation entirely. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/average.h | 24 ------------------- lib/Kconfig | 10 -------- lib/average.c | 64 ------------------------------------------------- 3 files changed, 98 deletions(-) delete mode 100644 lib/average.c (limited to 'include') diff --git a/include/linux/average.h b/include/linux/average.h index 60f8226c5652..d04aa58280de 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -3,30 +3,6 @@ /* Exponentially weighted moving average (EWMA) */ -/* For more documentation see lib/average.c */ - -struct ewma { - unsigned long internal; - unsigned long factor; - unsigned long weight; -}; - -extern void ewma_init(struct ewma *avg, unsigned long factor, - unsigned long weight); - -extern struct ewma *ewma_add(struct ewma *avg, unsigned long val); - -/** - * ewma_read() - Get average value - * @avg: Average structure - * - * Returns the average value held in @avg. - */ -static inline unsigned long ewma_read(const struct ewma *avg) -{ - return avg->internal >> avg->factor; -} - #define DECLARE_EWMA(name, _factor, _weight) \ struct ewma_##name { \ unsigned long internal; \ diff --git a/lib/Kconfig b/lib/Kconfig index 3a2ef67db6c7..278890dd1049 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -460,16 +460,6 @@ config ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE config LRU_CACHE tristate -config AVERAGE - bool "Averaging functions" - help - This option is provided for the case where no in-kernel-tree - modules require averaging functions, but a module built outside - the kernel tree does. Such modules that use library averaging - functions require Y here. - - If unsure, say N. - config CLZ_TAB bool diff --git a/lib/average.c b/lib/average.c deleted file mode 100644 index 114d1beae0c7..000000000000 --- a/lib/average.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * lib/average.c - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include - -/** - * DOC: Exponentially Weighted Moving Average (EWMA) - * - * These are generic functions for calculating Exponentially Weighted Moving - * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled - * up internal representation of the average value to prevent rounding errors. - * The factor for scaling up and the exponential weight (or decay rate) have to - * be specified thru the init fuction. The structure should not be accessed - * directly but only thru the helper functions. - */ - -/** - * ewma_init() - Initialize EWMA parameters - * @avg: Average structure - * @factor: Factor to use for the scaled up internal value. The maximum value - * of averages can be ULONG_MAX/(factor*weight). For performance reasons - * factor has to be a power of 2. - * @weight: Exponential weight, or decay rate. This defines how fast the - * influence of older values decreases. For performance reasons weight has - * to be a power of 2. - * - * Initialize the EWMA parameters for a given struct ewma @avg. - */ -void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight) -{ - WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor)); - - avg->weight = ilog2(weight); - avg->factor = ilog2(factor); - avg->internal = 0; -} -EXPORT_SYMBOL(ewma_init); - -/** - * ewma_add() - Exponentially weighted moving average (EWMA) - * @avg: Average structure - * @val: Current value - * - * Add a sample to the average. - */ -struct ewma *ewma_add(struct ewma *avg, unsigned long val) -{ - unsigned long internal = ACCESS_ONCE(avg->internal); - - ACCESS_ONCE(avg->internal) = internal ? - (((internal << avg->weight) - internal) + - (val << avg->factor)) >> avg->weight : - (val << avg->factor); - return avg; -} -EXPORT_SYMBOL(ewma_add); -- cgit v1.2.3