From b8b8b16352cd90c6083033fd4487f04fae935c18 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 20 Sep 2017 16:15:05 -0500 Subject: rtlwifi: rtl8821ae: Fix connection lost problem In commit 40b368af4b75 ("rtlwifi: Fix alignment issues"), the read of REG_DBI_READ was changed from 16 to 8 bits. For unknown reasonsi this change results in reduced stability for the wireless connection. This regression was located using bisection. Fixes: 40b368af4b75 ("rtlwifi: Fix alignment issues") Reported-and-tested-by: James Cameron Signed-off-by: Larry Finger Cc: Stable # 4.11+ Cc: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 4f73012978e9..1d431d4bf6d2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1122,7 +1122,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_byte(rtlpriv, read_addr); + ret = rtl_read_word(rtlpriv, read_addr); } return ret; } -- cgit v1.2.3 From dd2349121bb1b8ff688c3ca6a2a0bea9d8c142ca Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:24 -0700 Subject: brcmfmac: Add check for short event packets The length of the data in the received skb is currently passed into brcmf_fweh_process_event() as packet_len, but this value is not checked. event_packet should be followed by DATALEN bytes of additional event data. Ensure that the received packet actually contains at least DATALEN bytes of additional data, to avoid copying uninitialized memory into event->data. Cc: # v3.8 Suggested-by: Mattias Nissler Signed-off-by: Kevin Cernekee Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 4eb1e1ce9ace..ef72baf6dd96 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -429,7 +429,8 @@ void brcmf_fweh_process_event(struct brcmf_pub *drvr, if (code != BRCMF_E_IF && !fweh->evt_handler[code]) return; - if (datalen > BRCMF_DCMD_MAXLEN) + if (datalen > BRCMF_DCMD_MAXLEN || + datalen + sizeof(*event_packet) > packet_len) return; if (in_interrupt()) -- cgit v1.2.3 From c503dd38f850be28867ef7a42d9abe5ade81a9bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:12 +0200 Subject: brcmsmac: make some local variables 'static const' to reduce stack size With KASAN and a couple of other patches applied, this driver is one of the few remaining ones that actually use more than 2048 bytes of kernel stack: broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy_gainctrl': broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 3264 bytes is larger than 2048 bytes [-Wframe-larger-than=] broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy': broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=] Here, I'm reducing the stack size by marking as many local variables as 'static const' as I can without changing the actual code. This is the first of three patches to improve the stack usage in this driver. It would be good to have this backported to stabl kernels to get all drivers in 'allmodconfig' below the 2048 byte limit so we can turn on the frame warning again globally, but I realize that the patch is larger than the normal limit for stable backports. The other two patches do not need to be backported. Cc: Acked-by: Arend van Spriel Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/phy/phy_n.c | 197 ++++++++++----------- 1 file changed, 97 insertions(+), 100 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index b3aab2fe96eb..ef685465f80a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -14764,8 +14764,8 @@ static void wlc_phy_ipa_restore_tx_digi_filts_nphy(struct brcms_phy *pi) } static void -wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, u8 *events, u8 *dlys, - u8 len) +wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, const u8 *events, + const u8 *dlys, u8 len) { u32 t1_offset, t2_offset; u8 ctr; @@ -15240,16 +15240,16 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev5(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) { u16 currband; - s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; - s8 *lna1_gain_db = NULL; - s8 *lna1_gain_db_2 = NULL; - s8 *lna2_gain_db = NULL; - s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; - s8 *tia_gain_db; - s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; - s8 *tia_gainbits; - u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; - u16 *rfseq_init_gain; + static const s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; + const s8 *lna1_gain_db = NULL; + const s8 *lna1_gain_db_2 = NULL; + const s8 *lna2_gain_db = NULL; + static const s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; + const s8 *tia_gain_db; + static const s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; + const s8 *tia_gainbits; + static const u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; + const u16 *rfseq_init_gain; u16 init_gaincode; u16 clip1hi_gaincode; u16 clip1md_gaincode = 0; @@ -15310,10 +15310,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) if ((freq <= 5080) || (freq == 5825)) { - s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; - s8 lna1A_gain_db_2_rev7[] = { - 11, 17, 22, 25}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; + static const s8 lna1A_gain_db_2_rev7[] = { 11, 17, 22, 25}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x3e; lna1_gain_db = lna1A_gain_db_rev7; @@ -15321,10 +15320,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else if ((freq >= 5500) && (freq <= 5700)) { - s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; crsminu_th = 0x45; clip1md_gaincode_B = 0x14; @@ -15335,10 +15333,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else { - s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x41; lna1_gain_db = lna1A_gain_db_rev7; @@ -15450,65 +15447,65 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) NPHY_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_RFSEQ_CMD_SET_HPF_BW }; - u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; - s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; - s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; - s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; - s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; - s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; - s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; - s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; - s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; - s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; - s8 *lna1_gain_db = NULL; - s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; - s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; - s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; - s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; - s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; - s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; - s8 *lna2_gain_db = NULL; - s8 tiaG_gain_db[] = { + static const u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; + static const s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; + static const s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; + static const s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; + static const s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; + static const s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; + static const s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; + static const s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; + static const s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; + static const s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; + const s8 *lna1_gain_db = NULL; + static const s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; + static const s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; + static const s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; + static const s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; + const s8 *lna2_gain_db = NULL; + static const s8 tiaG_gain_db[] = { 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A }; - s8 tiaA_gain_db[] = { + static const s8 tiaA_gain_db[] = { 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 }; - s8 tiaA_gain_db_rev4[] = { + static const s8 tiaA_gain_db_rev4[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev5[] = { + static const s8 tiaA_gain_db_rev5[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev6[] = { + static const s8 tiaA_gain_db_rev6[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 *tia_gain_db; - s8 tiaG_gainbits[] = { + const s8 *tia_gain_db; + static const s8 tiaG_gainbits[] = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 }; - s8 tiaA_gainbits[] = { + static const s8 tiaA_gainbits[] = { 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 }; - s8 tiaA_gainbits_rev4[] = { + static const s8 tiaA_gainbits_rev4[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev5[] = { + static const s8 tiaA_gainbits_rev5[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev6[] = { + static const s8 tiaA_gainbits_rev6[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 *tia_gainbits; - s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; - s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; - u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; - u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev5_elna[] = { + const s8 *tia_gainbits; + static const s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; + static const s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; + static const u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; + static const u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev5_elna[] = { 0x013f, 0x013f, 0x013f, 0x013f }; - u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; - u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; - u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; - u16 rfseqA_init_gain_rev4_elna[] = { + static const u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; + static const u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; + static const u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; + static const u16 rfseqA_init_gain_rev4_elna[] = { 0x314f, 0x314f, 0x314f, 0x314f }; - u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; - u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; - u16 *rfseq_init_gain; + static const u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; + static const u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; + const u16 *rfseq_init_gain; u16 initG_gaincode = 0x627e; u16 initG_gaincode_rev4 = 0x527e; u16 initG_gaincode_rev5 = 0x427e; @@ -15538,10 +15535,10 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) u16 clip1mdA_gaincode_rev6 = 0x2084; u16 clip1md_gaincode = 0; u16 clip1loG_gaincode = 0x0074; - u16 clip1loG_gaincode_rev5[] = { + static const u16 clip1loG_gaincode_rev5[] = { 0x0062, 0x0064, 0x006a, 0x106a, 0x106c, 0x1074, 0x107c, 0x207c }; - u16 clip1loG_gaincode_rev6[] = { + static const u16 clip1loG_gaincode_rev6[] = { 0x106a, 0x106c, 0x1074, 0x107c, 0x007e, 0x107e, 0x207e, 0x307e }; u16 clip1loG_gaincode_rev6_224B0 = 0x1074; @@ -16066,7 +16063,7 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) { - u8 rfseq_rx2tx_events[] = { + static const u8 rfseq_rx2tx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_TR_SWITCH, @@ -16076,7 +16073,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_EXT_PA }; u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 }; - u8 rfseq_tx2rx_events[] = { + static const u8 rfseq_tx2rx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_EXT_PA, NPHY_RFSEQ_CMD_TX_GAIN, @@ -16085,8 +16082,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_CLR_HIQ_DIS }; - u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; - u8 rfseq_tx2rx_events_rev3[] = { + static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_EXT_PA, NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_TX_GAIN, @@ -16096,7 +16093,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; u8 rfseq_rx2tx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, @@ -16110,7 +16107,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) }; u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 }; - u8 rfseq_rx2tx_events_rev3_ipa[] = { + static const u8 rfseq_rx2tx_events_rev3_ipa[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, NPHY_REV3_RFSEQ_CMD_TR_SWITCH, @@ -16121,15 +16118,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; - u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; s16 alpha0, alpha1, alpha2; s16 beta0, beta1, beta2; u32 leg_data_weights, ht_data_weights, nss1_data_weights, stbc_data_weights; u8 chan_freq_range = 0; - u16 dac_control = 0x0002; + static const u16 dac_control = 0x0002; u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 }; u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 }; u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 }; @@ -16139,8 +16136,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 }; u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 }; u16 *aux_adc_gain; - u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; - u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; + static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; + static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; s32 min_nvar_val = 0x18d; s32 min_nvar_offset_6mbps = 20; u8 pdetrange; @@ -16151,9 +16148,9 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_tx2rx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_pktgn_lpf_h_hpc_rev7 = 0x77; - u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; - u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; - u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; + static const u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; u16 ipalvlshift_3p3_war_en = 0; u16 rccal_bcap_val, rccal_scap_val; u16 rccal_tx20_11b_bcap = 0; @@ -24291,13 +24288,13 @@ static void wlc_phy_update_txcal_ladder_nphy(struct brcms_phy *pi, u16 core) u16 bbmult; u16 tblentry; - struct nphy_txiqcal_ladder ladder_lo[] = { + static const struct nphy_txiqcal_ladder ladder_lo[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {25, 1}, {25, 2}, {25, 3}, {25, 4}, {25, 5}, {25, 6}, {25, 7}, {35, 7}, {50, 7}, {71, 7}, {100, 7} }; - struct nphy_txiqcal_ladder ladder_iq[] = { + static const struct nphy_txiqcal_ladder ladder_iq[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {35, 0}, {50, 0}, {71, 0}, {100, 0}, {100, 1}, {100, 2}, {100, 3}, {100, 4}, {100, 5}, {100, 6}, {100, 7} @@ -25773,67 +25770,67 @@ wlc_phy_cal_txiqlo_nphy(struct brcms_phy *pi, struct nphy_txgains target_gain, u16 cal_gain[2]; struct nphy_iqcal_params cal_params[2]; u32 tbl_len; - void *tbl_ptr; + const void *tbl_ptr; bool ladder_updated[2]; u8 mphase_cal_lastphase = 0; int bcmerror = 0; bool phyhang_avoid_state = false; - u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { 0x0300, 0x0500, 0x0700, 0x0900, 0x0d00, 0x1100, 0x1900, 0x1901, 0x1902, 0x1903, 0x1904, 0x1905, 0x1906, 0x1907, 0x2407, 0x3207, 0x4607, 0x6407 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { 0x0200, 0x0300, 0x0600, 0x0900, 0x0d00, 0x1100, 0x1900, 0x2400, 0x3200, 0x4600, 0x6400, 0x6401, 0x6402, 0x6403, 0x6404, 0x6405, 0x6406, 0x6407 }; - u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { 0x0200, 0x0300, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1201, 0x1202, 0x1203, 0x1204, 0x1205, 0x1206, 0x1207, 0x1907, 0x2307, 0x3207, 0x4707 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { 0x0100, 0x0200, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1900, 0x2300, 0x3200, 0x4700, 0x4701, 0x4702, 0x4703, 0x4704, 0x4705, 0x4706, 0x4707 }; - u16 tbl_tx_iqlo_cal_startcoefs[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { 0x8123, 0x8264, 0x8086, 0x8245, 0x8056, 0x9123, 0x9264, 0x9086, 0x9245, 0x9056 }; - u16 tbl_tx_iqlo_cal_cmds_recal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal[] = { 0x8101, 0x8253, 0x8053, 0x8234, 0x8034, 0x9101, 0x9253, 0x9053, 0x9234, 0x9034 }; - u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { 0x8434, 0x8334, 0x8084, 0x8267, 0x8056, 0x8234, 0x9434, 0x9334, 0x9084, 0x9267, 0x9056, 0x9234 }; - u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { 0x8423, 0x8323, 0x8073, 0x8256, 0x8045, 0x8223, 0x9423, 0x9323, 0x9073, 0x9256, 0x9045, 0x9223 }; -- cgit v1.2.3 From baf41bc35f2bdb953da532645fd82009c2d12acf Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 13 Sep 2017 16:46:14 +0300 Subject: iwlwifi: mvm: do not print security error in monitor mode In monitor mode we are not expected to decrypt encrypted packets (not having the keys). Hence we are expected to get an unknown rx security status. Keeping the print in monitor mode causes a print for each captured packet flooding the dmesg. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3bcaa82f59b2..a9ac872226fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1077,6 +1077,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm) mvm->vif_count = 0; mvm->rx_ba_sessions = 0; mvm->fwrt.dump.conf = FW_DBG_INVALID; + mvm->monitor_on = false; /* keep statistics ticking */ iwl_mvm_accu_radio_stats(mvm); @@ -1437,6 +1438,9 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, mvm->p2p_device_vif = vif; } + if (vif->type == NL80211_IFTYPE_MONITOR) + mvm->monitor_on = true; + iwl_mvm_vif_dbgfs_register(mvm, vif); goto out_unlock; @@ -1526,6 +1530,9 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, iwl_mvm_power_update_mac(mvm); iwl_mvm_mac_ctxt_remove(mvm, vif); + if (vif->type == NL80211_IFTYPE_MONITOR) + mvm->monitor_on = false; + out_release: mutex_unlock(&mvm->mutex); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 83303bac0e4b..d75da37a79f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1015,6 +1015,9 @@ struct iwl_mvm { bool drop_bcn_ap_mode; struct delayed_work cs_tx_unblock_dwork; + + /* does a monitor vif exist (only one can exist hence bool) */ + bool monitor_on; #ifdef CONFIG_ACPI struct iwl_mvm_sar_profile sar_profiles[IWL_MVM_SAR_PROFILE_NUM]; struct iwl_mvm_geo_profile geo_profiles[IWL_NUM_GEO_PROFILES]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 184c749766f2..2d14a58cbdd7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -244,7 +244,9 @@ static u32 iwl_mvm_set_mac80211_rx_flag(struct iwl_mvm *mvm, return 0; default: - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); + /* Expected in monitor (not having the keys) */ + if (!mvm->monitor_on) + IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); } return 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 77f77bc5d083..248699c2c4bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -277,7 +277,9 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, stats->flag |= RX_FLAG_DECRYPTED; return 0; default: - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status); + /* Expected in monitor (not having the keys) */ + if (!mvm->monitor_on) + IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status); } return 0; -- cgit v1.2.3 From 1efc3843a4ee1331bc20df685a79b47fa0f547d2 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Tue, 12 Sep 2017 12:32:25 +0300 Subject: iwlwifi: stop dbgc recording before stopping DMA Today we stop the device and the DMA without stopping the dbgc recording before. This causes host crashes when the DMA rate is high. Stop dbgc recording when clearing the fw debug configuration to fix this. Signed-off-by: Golan Ben Ami Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 7 ++----- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 6afc7a799892..f5dd7d83cd0a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1086,7 +1086,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { /* stop recording */ - iwl_set_bits_prph(fwrt->trans, MON_BUFF_SAMPLE_CTL, 0x100); + iwl_fw_dbg_stop_recording(fwrt); iwl_fw_error_dump(fwrt); @@ -1104,10 +1104,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) u32 in_sample = iwl_read_prph(fwrt->trans, DBGC_IN_SAMPLE); u32 out_ctrl = iwl_read_prph(fwrt->trans, DBGC_OUT_CTRL); - /* stop recording */ - iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, 0); - udelay(100); - iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, 0); + iwl_fw_dbg_stop_recording(fwrt); /* wait before we collect the data till the DBGC stop */ udelay(500); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 0f810ea89d31..9c889a32fe24 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -68,6 +68,8 @@ #include #include #include "runtime.h" +#include "iwl-prph.h" +#include "iwl-io.h" #include "file.h" #include "error-dump.h" @@ -194,8 +196,21 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt, iwl_fw_dbg_get_trigger((fwrt)->fw,\ (trig))) +static inline void iwl_fw_dbg_stop_recording(struct iwl_fw_runtime *fwrt) +{ + if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_set_bits_prph(fwrt->trans, MON_BUFF_SAMPLE_CTL, 0x100); + } else { + iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, 0); + } +} + static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt) { + iwl_fw_dbg_stop_recording(fwrt); + fwrt->dump.conf = FW_DBG_INVALID; } -- cgit v1.2.3 From 1442a9a9f2e441b15393c2d89286303b103a57e8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 18 Sep 2017 14:39:26 +0300 Subject: iwlwifi: mvm: return -ENODATA when reading the temperature with the FW down It seems that libsensors treats -EIO as a special non-recoverable failure when it tries to read the temperature while the firmware is not running. To solve that, change the error code to a milder -ENODATA. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=196941 Fixes: c221daf219b1 ("iwlwifi: mvm: add registration to thermal zone") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 4d907f60bce9..1232f63278eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -631,7 +631,7 @@ static int iwl_mvm_tzone_get_temp(struct thermal_zone_device *device, if (!iwl_mvm_firmware_running(mvm) || mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) { - ret = -EIO; + ret = -ENODATA; goto out; } -- cgit v1.2.3 From d8c73e455d7b973d1346bb5632b4a41819b090c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Sep 2017 11:03:50 +0200 Subject: iwlwifi: nvm-parse: unify channel flags printing The current channel flags printing is very strange and messy, in LAR we sometimes print the channel number and sometimes the frequency, in both we print a calculated value (whether ad-hoc is supported or not) etc. Unify all this to * print the channel number, not the frequency * remove the band print (2.4/5.2 GHz, it's obvious) * remove the calculated Ad-Hoc print Doing all of this also gets the length of the string to a max of 101 characters, which is below the max of 110 for tracing, and thus avoids the warning that came up on certain channels with certain flag combinations. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 98 +++++++++------------- 1 file changed, 39 insertions(+), 59 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 3014beef4873..35638404c24e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -206,8 +206,36 @@ enum iwl_nvm_channel_flags { NVM_CHANNEL_DC_HIGH = BIT(12), }; +static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level, + int chan, u16 flags) +{ #define CHECK_AND_PRINT_I(x) \ - ((ch_flags & NVM_CHANNEL_##x) ? # x " " : "") + ((flags & NVM_CHANNEL_##x) ? " " #x : "") + + if (!(flags & NVM_CHANNEL_VALID)) { + IWL_DEBUG_DEV(dev, level, "Ch. %d: 0x%x: No traffic\n", + chan, flags); + return; + } + + /* Note: already can print up to 101 characters, 110 is the limit! */ + IWL_DEBUG_DEV(dev, level, + "Ch. %d: 0x%x:%s%s%s%s%s%s%s%s%s%s%s%s\n", + chan, flags, + CHECK_AND_PRINT_I(VALID), + CHECK_AND_PRINT_I(IBSS), + CHECK_AND_PRINT_I(ACTIVE), + CHECK_AND_PRINT_I(RADAR), + CHECK_AND_PRINT_I(INDOOR_ONLY), + CHECK_AND_PRINT_I(GO_CONCURRENT), + CHECK_AND_PRINT_I(UNIFORM), + CHECK_AND_PRINT_I(20MHZ), + CHECK_AND_PRINT_I(40MHZ), + CHECK_AND_PRINT_I(80MHZ), + CHECK_AND_PRINT_I(160MHZ), + CHECK_AND_PRINT_I(DC_HIGH)); +#undef CHECK_AND_PRINT_I +} static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, u16 nvm_flags, const struct iwl_cfg *cfg) @@ -302,12 +330,8 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, * supported, hence we still want to add them to * the list of supported channels to cfg80211. */ - IWL_DEBUG_EEPROM(dev, - "Ch. %d Flags %x [%sGHz] - No traffic\n", - nvm_chan[ch_idx], - ch_flags, - (ch_idx >= num_2ghz_channels) ? - "5.2" : "2.4"); + iwl_nvm_print_channel_flags(dev, IWL_DL_EEPROM, + nvm_chan[ch_idx], ch_flags); continue; } @@ -337,27 +361,10 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, else channel->flags = 0; - IWL_DEBUG_EEPROM(dev, - "Ch. %d [%sGHz] flags 0x%x %s%s%s%s%s%s%s%s%s%s%s%s(%ddBm): Ad-Hoc %ssupported\n", - channel->hw_value, - is_5ghz ? "5.2" : "2.4", - ch_flags, - CHECK_AND_PRINT_I(VALID), - CHECK_AND_PRINT_I(IBSS), - CHECK_AND_PRINT_I(ACTIVE), - CHECK_AND_PRINT_I(RADAR), - CHECK_AND_PRINT_I(INDOOR_ONLY), - CHECK_AND_PRINT_I(GO_CONCURRENT), - CHECK_AND_PRINT_I(UNIFORM), - CHECK_AND_PRINT_I(20MHZ), - CHECK_AND_PRINT_I(40MHZ), - CHECK_AND_PRINT_I(80MHZ), - CHECK_AND_PRINT_I(160MHZ), - CHECK_AND_PRINT_I(DC_HIGH), - channel->max_power, - ((ch_flags & NVM_CHANNEL_IBSS) && - !(ch_flags & NVM_CHANNEL_RADAR)) - ? "" : "not "); + iwl_nvm_print_channel_flags(dev, IWL_DL_EEPROM, + channel->hw_value, ch_flags); + IWL_DEBUG_EEPROM(dev, "Ch. %d: %ddBm\n", + channel->hw_value, channel->max_power); } return n_channels; @@ -873,12 +880,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, new_rule = false; if (!(ch_flags & NVM_CHANNEL_VALID)) { - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d Flags %x [%sGHz] - No traffic\n", - nvm_chan[ch_idx], - ch_flags, - (ch_idx >= NUM_2GHZ_CHANNELS) ? - "5.2" : "2.4"); + iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, + nvm_chan[ch_idx], ch_flags); continue; } @@ -914,31 +917,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, prev_center_freq = center_freq; prev_reg_rule_flags = reg_rule_flags; - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s%s%s%s(0x%02x)\n", - center_freq, - band == NL80211_BAND_5GHZ ? "5.2" : "2.4", - CHECK_AND_PRINT_I(VALID), - CHECK_AND_PRINT_I(IBSS), - CHECK_AND_PRINT_I(ACTIVE), - CHECK_AND_PRINT_I(RADAR), - CHECK_AND_PRINT_I(INDOOR_ONLY), - CHECK_AND_PRINT_I(GO_CONCURRENT), - CHECK_AND_PRINT_I(UNIFORM), - CHECK_AND_PRINT_I(20MHZ), - CHECK_AND_PRINT_I(40MHZ), - CHECK_AND_PRINT_I(80MHZ), - CHECK_AND_PRINT_I(160MHZ), - CHECK_AND_PRINT_I(DC_HIGH), - ch_flags); - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] reg_flags 0x%x: %s\n", - center_freq, - band == NL80211_BAND_5GHZ ? "5.2" : "2.4", - reg_rule_flags, - ((ch_flags & NVM_CHANNEL_ACTIVE) && - !(ch_flags & NVM_CHANNEL_RADAR)) - ? "Ad-Hoc" : ""); + iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, + nvm_chan[ch_idx], ch_flags); } regd->n_reg_rules = valid_rules; -- cgit v1.2.3 From 44fd09dad5d2b78efbabbbbf623774e561e36cca Mon Sep 17 00:00:00 2001 From: Chaya Rachel Ivgi Date: Mon, 4 Sep 2017 14:40:06 +0300 Subject: iwlwifi: nvm: set the correct offsets to 3168 series The driver currently handles two NVM formats, one for 7000 family and below, and one for 8000 family and above. The 3168 series uses something in between, so currently the driver uses incorrect offsets for it. Fix the incorrect offsets. Fixes: c4836b056d83 ("iwlwifi: Add PCI IDs for the new 3168 series") Signed-off-by: Chaya Rachel Ivgi Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/7000.c | 1 + drivers/net/wireless/intel/iwlwifi/cfg/8000.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/a000.c | 2 +- .../net/wireless/intel/iwlwifi/fw/api/nvm-reg.h | 2 ++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 16 +++++++-- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 39 +++++++++++++--------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 21 ++++++++---- 9 files changed, 59 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/7000.c b/drivers/net/wireless/intel/iwlwifi/cfg/7000.c index 45e2efc70d19..ce741beec1fc 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/7000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/7000.c @@ -309,6 +309,7 @@ const struct iwl_cfg iwl3168_2ac_cfg = { .nvm_calib_ver = IWL3168_TX_POWER_VERSION, .pwr_tx_backoffs = iwl7265_pwr_tx_backoffs, .dccm_len = IWL7265_DCCM_LEN, + .nvm_type = IWL_NVM_SDP, }; const struct iwl_cfg iwl7265_2ac_cfg = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c index 2e6c52664cee..c2a5936ccede 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c @@ -164,7 +164,7 @@ static const struct iwl_tt_params iwl8000_tt_params = { .default_nvm_file_C_step = DEFAULT_NVM_FILE_FAMILY_8000C, \ .thermal_params = &iwl8000_tt_params, \ .apmg_not_supported = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true #define IWL_DEVICE_8000 \ diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index 2babe0a1f18b..e8b5ff42f5a8 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -148,7 +148,7 @@ static const struct iwl_tt_params iwl9000_tt_params = { .vht_mu_mimo_supported = true, \ .mac_addr_from_csr = true, \ .rf_id = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true const struct iwl_cfg iwl9160_2ac_cfg = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c index 76ba1f8bc72f..a440140ed8dd 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c @@ -133,7 +133,7 @@ static const struct iwl_ht_params iwl_a000_ht_params = { .use_tfh = true, \ .rf_id = true, \ .gen2 = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true const struct iwl_cfg iwla000_2ac_cfg_hr = { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index 00bc7a25dece..3fd07bc80f54 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -108,6 +108,7 @@ enum iwl_nvm_access_target { * @NVM_SECTION_TYPE_REGULATORY: regulatory section * @NVM_SECTION_TYPE_CALIBRATION: calibration section * @NVM_SECTION_TYPE_PRODUCTION: production section + * @NVM_SECTION_TYPE_REGULATORY_SDP: regulatory section used by 3168 series * @NVM_SECTION_TYPE_MAC_OVERRIDE: MAC override section * @NVM_SECTION_TYPE_PHY_SKU: PHY SKU section * @NVM_MAX_NUM_SECTIONS: number of sections @@ -117,6 +118,7 @@ enum iwl_nvm_section_type { NVM_SECTION_TYPE_REGULATORY = 3, NVM_SECTION_TYPE_CALIBRATION = 4, NVM_SECTION_TYPE_PRODUCTION = 5, + NVM_SECTION_TYPE_REGULATORY_SDP = 8, NVM_SECTION_TYPE_MAC_OVERRIDE = 11, NVM_SECTION_TYPE_PHY_SKU = 12, NVM_MAX_NUM_SECTIONS = 13, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 3e057b539d5b..71cb1ecde0f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -108,6 +108,18 @@ enum iwl_led_mode { IWL_LED_DISABLE, }; +/** + * enum iwl_nvm_type - nvm formats + * @IWL_NVM: the regular format + * @IWL_NVM_EXT: extended NVM format + * @IWL_NVM_SDP: NVM format used by 3168 series + */ +enum iwl_nvm_type { + IWL_NVM, + IWL_NVM_EXT, + IWL_NVM_SDP, +}; + /* * This is the threshold value of plcp error rate per 100mSecs. It is * used to set and check for the validity of plcp_delta. @@ -320,7 +332,7 @@ struct iwl_pwr_tx_backoff { * @integrated: discrete or integrated * @gen2: a000 and on transport operation * @cdb: CDB support - * @ext_nvm: extended NVM format + * @nvm_type: see &enum iwl_nvm_type * * We enable the driver to be backward compatible wrt. hardware features. * API differences in uCode shouldn't be handled here but through TLVs @@ -342,6 +354,7 @@ struct iwl_cfg { const struct iwl_tt_params *thermal_params; enum iwl_device_family device_family; enum iwl_led_mode led_mode; + enum iwl_nvm_type nvm_type; u32 max_data_size; u32 max_inst_size; netdev_features_t features; @@ -369,7 +382,6 @@ struct iwl_cfg { use_tfh:1, gen2:1, cdb:1, - ext_nvm:1, dbgc_supported:1; u8 valid_tx_ant; u8 valid_rx_ant; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 35638404c24e..c3a5d8ccc95e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -77,7 +77,7 @@ #include "iwl-csr.h" /* NVM offsets (in words) definitions */ -enum wkp_nvm_offsets { +enum nvm_offsets { /* NVM HW-Section offset (in words) definitions */ SUBSYSTEM_ID = 0x0A, HW_ADDR = 0x15, @@ -92,7 +92,10 @@ enum wkp_nvm_offsets { /* NVM calibration section offset (in words) definitions */ NVM_CALIB_SECTION = 0x2B8, - XTAL_CALIB = 0x316 - NVM_CALIB_SECTION + XTAL_CALIB = 0x316 - NVM_CALIB_SECTION, + + /* NVM REGULATORY -Section offset (in words) definitions */ + NVM_CHANNELS_SDP = 0, }; enum ext_nvm_offsets { @@ -243,7 +246,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, u32 flags = IEEE80211_CHAN_NO_HT40; u32 last_5ghz_ht = LAST_5GHZ_HT; - if (cfg->ext_nvm) + if (cfg->nvm_type == IWL_NVM_EXT) last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000; if (!is_5ghz && (nvm_flags & NVM_CHANNEL_40MHZ)) { @@ -296,7 +299,7 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, num_2ghz_channels; const u8 *nvm_chan; - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { num_of_ch = IWL_NUM_CHANNELS; nvm_chan = &iwl_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS; @@ -491,7 +494,7 @@ IWL_EXPORT_SYMBOL(iwl_init_sbands); static int iwl_get_sku(const struct iwl_cfg *cfg, const __le16 *nvm_sw, const __le16 *phy_sku) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + SKU); return le32_to_cpup((__le32 *)(phy_sku + SKU_FAMILY_8000)); @@ -499,7 +502,7 @@ static int iwl_get_sku(const struct iwl_cfg *cfg, const __le16 *nvm_sw, static int iwl_get_nvm_version(const struct iwl_cfg *cfg, const __le16 *nvm_sw) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + NVM_VERSION); else return le32_to_cpup((__le32 *)(nvm_sw + @@ -509,7 +512,7 @@ static int iwl_get_nvm_version(const struct iwl_cfg *cfg, const __le16 *nvm_sw) static int iwl_get_radio_cfg(const struct iwl_cfg *cfg, const __le16 *nvm_sw, const __le16 *phy_sku) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + RADIO_CFG); return le32_to_cpup((__le32 *)(phy_sku + RADIO_CFG_FAMILY_EXT_NVM)); @@ -520,7 +523,7 @@ static int iwl_get_n_hw_addrs(const struct iwl_cfg *cfg, const __le16 *nvm_sw) { int n_hw_addr; - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + N_HW_ADDRS); n_hw_addr = le32_to_cpup((__le32 *)(nvm_sw + N_HW_ADDRS_FAMILY_8000)); @@ -532,7 +535,7 @@ static void iwl_set_radio_cfg(const struct iwl_cfg *cfg, struct iwl_nvm_data *data, u32 radio_cfg) { - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { data->radio_cfg_type = NVM_RF_CFG_TYPE_MSK(radio_cfg); data->radio_cfg_step = NVM_RF_CFG_STEP_MSK(radio_cfg); data->radio_cfg_dash = NVM_RF_CFG_DASH_MSK(radio_cfg); @@ -641,7 +644,7 @@ static int iwl_set_hw_address(struct iwl_trans *trans, { if (cfg->mac_addr_from_csr) { iwl_set_hw_address_from_csr(trans, data); - } else if (!cfg->ext_nvm) { + } else if (cfg->nvm_type != IWL_NVM_EXT) { const u8 *hw_addr = (const u8 *)(nvm_hw + HW_ADDR); /* The byte order is little endian 16 bit, meaning 214365 */ @@ -713,7 +716,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, u16 lar_config; const __le16 *ch_section; - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * IWL_NUM_CHANNELS, @@ -747,7 +750,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw); - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { /* Checking for required sections */ if (!nvm_calib) { IWL_ERR(trans, @@ -755,11 +758,15 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, kfree(data); return NULL; } + + ch_section = cfg->nvm_type == IWL_NVM_SDP ? + ®ulatory[NVM_CHANNELS_SDP] : + &nvm_sw[NVM_CHANNELS]; + /* in family 8000 Xtal calibration values moved to OTP */ data->xtal_calib[0] = *(nvm_calib + XTAL_CALIB); data->xtal_calib[1] = *(nvm_calib + XTAL_CALIB + 1); lar_enabled = true; - ch_section = &nvm_sw[NVM_CHANNELS]; } else { u16 lar_offset = data->nvm_version < 0xE39 ? NVM_LAR_OFFSET_OLD : @@ -793,7 +800,7 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan, u32 flags = NL80211_RRF_NO_HT40; u32 last_5ghz_ht = LAST_5GHZ_HT; - if (cfg->ext_nvm) + if (cfg->nvm_type == IWL_NVM_EXT) last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000; if (ch_idx < NUM_2GHZ_CHANNELS && @@ -841,7 +848,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int ch_idx; u16 ch_flags; u32 reg_rule_flags, prev_reg_rule_flags = 0; - const u8 *nvm_chan = cfg->ext_nvm ? + const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ? iwl_ext_nvm_channels : iwl_nvm_channels; struct ieee80211_regdomain *regd; int size_of_regd; @@ -850,7 +857,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int center_freq, prev_center_freq = 0; int valid_rules = 0; bool new_rule; - int max_num_ch = cfg->ext_nvm ? + int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ? IWL_NUM_CHANNELS_EXT : IWL_NUM_CHANNELS; if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES)) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d75da37a79f3..949e63418299 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1162,7 +1162,7 @@ static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) * Enable LAR only if it is supported by the FW (TLV) && * enabled in the NVM */ - if (mvm->cfg->ext_nvm) + if (mvm->cfg->nvm_type == IWL_NVM_EXT) return nvm_lar && tlv_lar; else return tlv_lar; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 422aa6be9932..fb25b6f29323 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -295,18 +295,24 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) const __be16 *hw; const __le16 *sw, *calib, *regulatory, *mac_override, *phy_sku; bool lar_enabled; + int regulatory_type; /* Checking for required sections */ - if (!mvm->trans->cfg->ext_nvm) { + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || !mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data) { IWL_ERR(mvm, "Can't parse empty OTP/NVM sections\n"); return NULL; } } else { + if (mvm->trans->cfg->nvm_type == IWL_NVM_SDP) + regulatory_type = NVM_SECTION_TYPE_REGULATORY_SDP; + else + regulatory_type = NVM_SECTION_TYPE_REGULATORY; + /* SW and REGULATORY sections are mandatory */ if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || - !mvm->nvm_sections[NVM_SECTION_TYPE_REGULATORY].data) { + !mvm->nvm_sections[regulatory_type].data) { IWL_ERR(mvm, "Can't parse empty family 8000 OTP/NVM sections\n"); return NULL; @@ -330,11 +336,14 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) hw = (const __be16 *)sections[mvm->cfg->nvm_hw_section_num].data; sw = (const __le16 *)sections[NVM_SECTION_TYPE_SW].data; calib = (const __le16 *)sections[NVM_SECTION_TYPE_CALIBRATION].data; - regulatory = (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data; mac_override = (const __le16 *)sections[NVM_SECTION_TYPE_MAC_OVERRIDE].data; phy_sku = (const __le16 *)sections[NVM_SECTION_TYPE_PHY_SKU].data; + regulatory = mvm->trans->cfg->nvm_type == IWL_NVM_SDP ? + (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY_SDP].data : + (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data; + lar_enabled = !iwlwifi_mod_params.lar_disable && fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT); @@ -394,7 +403,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) IWL_DEBUG_EEPROM(mvm->trans->dev, "Read from external NVM\n"); /* Maximal size depends on NVM version */ - if (!mvm->trans->cfg->ext_nvm) + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) max_section_size = IWL_MAX_NVM_SECTION_SIZE; else max_section_size = IWL_MAX_EXT_NVM_SECTION_SIZE; @@ -465,7 +474,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) break; } - if (!mvm->trans->cfg->ext_nvm) { + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { section_size = 2 * NVM_WORD1_LEN(le16_to_cpu(file_sec->word1)); section_id = NVM_WORD2_ID(le16_to_cpu(file_sec->word2)); @@ -740,7 +749,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) struct ieee80211_regdomain *regd; char mcc[3]; - if (mvm->cfg->ext_nvm) { + if (mvm->cfg->nvm_type == IWL_NVM_EXT) { tlv_lar = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT); nvm_lar = mvm->nvm_data->lar_enabled; -- cgit v1.2.3 From 9f1c2674b328a69ab5a9b5a1c52405795ee4163f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Oct 2017 21:44:51 -0700 Subject: net: memcontrol: defer call to mem_cgroup_sk_alloc() Instead of calling mem_cgroup_sk_alloc() from BH context, it is better to call it from inet_csk_accept() in process context. Not only this removes code in mem_cgroup_sk_alloc(), but it also fixes a bug since listener might have been dismantled and css_get() might cause a use-after-free. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- mm/memcontrol.c | 15 --------------- net/core/sock.c | 5 ++++- net/ipv4/inet_connection_sock.c | 1 + 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5f3a62887cf..661f046ad318 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5828,21 +5828,6 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - BUG_ON(mem_cgroup_is_root(sk->sk_memcg)); - css_get(&sk->sk_memcg->css); - return; - } - rcu_read_lock(); memcg = mem_cgroup_from_task(current); if (memcg == root_mem_cgroup) diff --git a/net/core/sock.c b/net/core/sock.c index 23953b741a41..70c6ccbdf49f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,6 +1677,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; @@ -1714,7 +1718,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - mem_cgroup_sk_alloc(newsk); cgroup_sk_alloc(&newsk->sk_cgrp_data); /* diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c039c937ba90..67aec7a10686 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + mem_cgroup_sk_alloc(newsk); out: release_sock(sk); if (req) -- cgit v1.2.3 From fbb1fb4ad415cb31ce944f65a5ca700aaf73a227 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Oct 2017 21:44:52 -0700 Subject: net: defer call to cgroup_sk_alloc() sk_clone_lock() might run while TCP/DCCP listener already vanished. In order to prevent use after free, it is better to defer cgroup_sk_alloc() to the point we know both parent and child exist, and from process context. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- kernel/cgroup/cgroup.c | 11 ----------- net/core/sock.c | 3 +-- net/ipv4/inet_connection_sock.c | 5 +++++ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 44857278eb8a..3380a3e49af5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5709,17 +5709,6 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; - /* Socket clone path */ - if (skcd->val) { - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - return; - } - rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 70c6ccbdf49f..4499e3153813 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1680,6 +1680,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; + memset(&newsk->sk_cgrp_data, 0, sizeof(newsk->sk_cgrp_data)); atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; @@ -1718,8 +1719,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 67aec7a10686..d32c74507314 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -476,6 +478,9 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } mem_cgroup_sk_alloc(newsk); + cgroup_sk_alloc(&newsk->sk_cgrp_data); + sock_update_classid(&newsk->sk_cgrp_data); + sock_update_netprioidx(&newsk->sk_cgrp_data); out: release_sock(sk); if (req) -- cgit v1.2.3 From e836e3211229d7307660239cc957f2ab60e6aa00 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 6 Sep 2017 10:11:38 +0200 Subject: i40e: Fix comment about locking for __i40e_read_nvm_word() Caller needs to acquire the lock. Called functions will not. Fixes: 09f79fd49d94 ("i40e: avoid NVM acquire deadlock during NVM update") Signed-off-by: Stefano Brivio Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 57505b1df98d..d591b3e6bd7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -298,7 +298,7 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset, } /** - * __i40e_read_nvm_word - Reads nvm word, assumes called does the locking + * __i40e_read_nvm_word - Reads nvm word, assumes caller does the locking * @hw: pointer to the HW structure * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) * @data: word read from the Shadow RAM -- cgit v1.2.3 From 2b9478ffc550f17c6cd8c69057234e91150f5972 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Oct 2017 08:44:43 -0700 Subject: i40e: Fix memory leak related filter programming status It looks like we weren't correctly placing the pages from buffers that had been used to return a filter programming status back on the ring. As a result they were being overwritten and tracking of the pages was lost. This change works to correct that by incorporating part of i40e_put_rx_buffer into the programming status handler code. As a result we should now be correctly placing the pages for those buffers on the re-allocation list instead of letting them stay in place. Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status") Reported-by: Anders K. Pedersen Signed-off-by: Alexander Duyck Tested-by: Anders K Pedersen Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1519dfb851d0..2756131495f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1037,6 +1037,32 @@ reset_latency: return false; } +/** + * i40e_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_buff) +{ + struct i40e_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_bi[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; +} + /** * i40e_rx_is_programming_status - check for programming status descriptor * @qw: qword representing status_error_len in CPU ordering @@ -1071,15 +1097,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, u64 qw) { - u32 ntc = rx_ring->next_to_clean + 1; + struct i40e_rx_buffer *rx_buffer; + u32 ntc = rx_ring->next_to_clean; u8 id; /* fetch, update, and store next to clean */ + rx_buffer = &rx_ring->rx_bi[ntc++]; ntc = (ntc < rx_ring->count) ? ntc : 0; rx_ring->next_to_clean = ntc; prefetch(I40E_RX_DESC(rx_ring, ntc)); + /* place unused page back on the ring */ + i40e_reuse_rx_page(rx_ring, rx_buffer); + rx_ring->rx_stats.page_reuse_count++; + + /* clear contents of buffer_info */ + rx_buffer->page = NULL; + id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; @@ -1638,32 +1673,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, return false; } -/** - * i40e_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *old_buff) -{ - struct i40e_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - /** * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check -- cgit v1.2.3 From 365ff9df562889501964ab5ee9fb4ce700d1a8c0 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Mon, 9 Oct 2017 12:41:53 -0700 Subject: wimax/i2400m: Remove VLAIS Convert Variable Length Array in Struct (VLAIS) to valid C by converting local struct definition to use a flexible array. The structure is only used to define a cast of a buffer so the size of the struct is not used to allocate storage. Signed-off-by: Behan Webster Signed-off-by: Mark Charebois Suggested-by: Arnd Bergmann Signed-off-by: Matthias Kaehlcke Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index c9c711dcd0e6..a89b5685e68b 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -652,7 +652,7 @@ static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk, struct device *dev = i2400m_dev(i2400m); struct { struct i2400m_bootrom_header cmd; - u8 cmd_payload[chunk_len]; + u8 cmd_payload[]; } __packed *buf; struct i2400m_bootrom_header ack; -- cgit v1.2.3 From c3d64ad4fea66d07e878b248b803ccd12c45e18c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2017 09:16:22 -0700 Subject: nfp: fix ethtool stats gather retry The while loop fetching 64 bit ethtool statistics may have to retry multiple times, it shouldn't modify the outside state. Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 07969f06df10..dc016dfec64d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -464,7 +464,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) do { start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); - *data++ = nn->r_vecs[i].rx_pkts; + data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; tmp[2] = nn->r_vecs[i].hw_csum_rx_error; @@ -472,14 +472,16 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) do { start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); - *data++ = nn->r_vecs[i].tx_pkts; - *data++ = nn->r_vecs[i].tx_busy; + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; tmp[3] = nn->r_vecs[i].hw_csum_tx; tmp[4] = nn->r_vecs[i].hw_csum_tx_inner; tmp[5] = nn->r_vecs[i].tx_gather; tmp[6] = nn->r_vecs[i].tx_lso; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + data += 3; + for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) gathered_stats[j] += tmp[j]; } -- cgit v1.2.3 From 5f0ca2fb71e28df146f590eebfe32b41171b737f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2017 09:16:23 -0700 Subject: nfp: handle page allocation failures page_address() does not handle NULL argument gracefully, make sure we NULL-check the page pointer before passing it to page_address(). Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1c0187f0af51..e118b5f23996 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1180,10 +1180,14 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) { void *frag; - if (!dp->xdp_prog) + if (!dp->xdp_prog) { frag = netdev_alloc_frag(dp->fl_bufsz); - else - frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD)); + } else { + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_COLD); + frag = page ? page_address(page) : NULL; + } if (!frag) { nn_dp_warn(dp, "Failed to alloc receive page frag\n"); return NULL; @@ -1203,10 +1207,14 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) { void *frag; - if (!dp->xdp_prog) + if (!dp->xdp_prog) { frag = napi_alloc_frag(dp->fl_bufsz); - else - frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD)); + } else { + struct page *page; + + page = alloc_page(GFP_ATOMIC | __GFP_COLD); + frag = page ? page_address(page) : NULL; + } if (!frag) { nn_dp_warn(dp, "Failed to alloc receive page frag\n"); return NULL; -- cgit v1.2.3 From 75cb070960ade40fba5de32138390f3c85c90941 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Oct 2017 19:12:32 -0700 Subject: Revert "net: defer call to cgroup_sk_alloc()" This reverts commit fbb1fb4ad415cb31ce944f65a5ca700aaf73a227. This was not the proper fix, lets cleanly revert it, so that following patch can be carried to stable versions. sock_cgroup_ptr() callers do not expect a NULL return value. Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- kernel/cgroup/cgroup.c | 11 +++++++++++ net/core/sock.c | 3 ++- net/ipv4/inet_connection_sock.c | 5 ----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3380a3e49af5..44857278eb8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5709,6 +5709,17 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 4499e3153813..70c6ccbdf49f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1680,7 +1680,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - memset(&newsk->sk_cgrp_data, 0, sizeof(newsk->sk_cgrp_data)); atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; @@ -1719,6 +1718,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + cgroup_sk_alloc(&newsk->sk_cgrp_data); + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d32c74507314..67aec7a10686 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -26,8 +26,6 @@ #include #include #include -#include -#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -478,9 +476,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } mem_cgroup_sk_alloc(newsk); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - sock_update_classid(&newsk->sk_cgrp_data); - sock_update_netprioidx(&newsk->sk_cgrp_data); out: release_sock(sk); if (req) -- cgit v1.2.3 From c0576e3975084d4699b7bfef578613fb8e1144f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Oct 2017 19:12:33 -0700 Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock() If for some reason, the newly allocated child need to be freed, we will call cgroup_put() (via sk_free_unlock_clone()) while the corresponding cgroup_get() was not yet done, and we will free memory too soon. Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- net/core/sock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 70c6ccbdf49f..415f441c63b9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1687,6 +1687,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -1718,8 +1719,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) -- cgit v1.2.3 From 5aba2ba5030b66a6f8c93049b718556f9aacd7c6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 10 Oct 2017 17:07:12 +0200 Subject: macsec: fix memory leaks when skb_to_sgvec fails Fixes: cda7ea690350 ("macsec: check return value of skb_to_sgvec always") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 98e4deaa3a6a..5ab1b8849c30 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -742,6 +742,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { + aead_request_free(req); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); @@ -954,6 +955,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { + aead_request_free(req); kfree_skb(skb); return ERR_PTR(ret); } -- cgit v1.2.3 From bde135a672bfd1cc41d91c2bbbbd36eb25409b74 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 11 Oct 2017 12:56:52 +0800 Subject: r8169: only enable PCI wakeups when WOL is active rtl_init_one() currently enables PCI wakeups if the ethernet device is found to be WOL-capable. There is no need to do this when rtl8169_set_wol() will correctly enable or disable the same wakeup flag when WOL is activated/deactivated. This works around an ACPI DSDT bug which prevents the Acer laptop models Aspire ES1-533, Aspire ES1-732, PackardBell ENTE69AP and Gateway NE533 from entering S3 suspend - even when no ethernet cable is connected. On these platforms, the DSDT says that GPE08 is a wakeup source for ethernet, but this GPE fires as soon as the system goes into suspend, waking the system up immediately. Having the wakeup normally disabled avoids this issue in the default case. With this change, WOL will continue to be unusable on these platforms (it will instantly wake up if WOL is later enabled by the user) but we do not expect this to be a commonly used feature on these consumer laptops. We have separately determined that WOL works fine without any ACPI GPEs enabled during sleep, so a DSDT fix or override would be possible to make WOL work. Signed-off-by: Daniel Drake Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e03fcf914690..a3c949ea7d1a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8491,8 +8491,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl8168_driver_start(tp); } - device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL); - if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); -- cgit v1.2.3 From 6e9c0075409d4ec1bc63558ee5a93916a6d7d16f Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 11 Oct 2017 16:54:27 +1100 Subject: net/ncsi: Don't limit vids based on hot_channel Currently we drop any new VLAN ids if there are more than the current (or last used) channel can support. Most importantly this is a problem if no channel has been selected yet, resulting in a segfault. Secondly this does not necessarily reflect the capabilities of any other channels. Instead only drop a new VLAN id if we are already tracking the maximum allowed by the NCSI specification. Per-channel limits are already handled by ncsi_add_filter(), but add a message to set_one_vid() to make it obvious that the channel can not support any more VLAN ids. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index af3d636534ef..d30f7bd741d0 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -286,6 +286,7 @@ struct ncsi_dev_priv { struct work_struct work; /* For channel management */ struct packet_type ptype; /* NCSI packet Rx handler */ struct list_head node; /* Form NCSI device list */ +#define NCSI_MAX_VLAN_VIDS 15 struct list_head vlan_vids; /* List of active VLAN IDs */ }; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 3fd3c39e6278..b6a449aa9d4b 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -732,6 +732,10 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, if (index < 0) { netdev_err(ndp->ndev.dev, "Failed to add new VLAN tag, error %d\n", index); + if (index == -ENOSPC) + netdev_err(ndp->ndev.dev, + "Channel %u already has all VLAN filters set\n", + nc->id); return -1; } @@ -1403,7 +1407,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - struct ncsi_channel_filter *ncf; struct ncsi_dev_priv *ndp; unsigned int n_vids = 0; struct vlan_vid *vlan; @@ -1420,7 +1423,6 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) } ndp = TO_NCSI_DEV_PRIV(nd); - ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN]; /* Add the VLAN id to our internal list */ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { @@ -1431,12 +1433,11 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) return 0; } } - - if (n_vids >= ncf->total) { - netdev_info(dev, - "NCSI Channel supports up to %u VLAN tags but %u are already set\n", - ncf->total, n_vids); - return -EINVAL; + if (n_vids >= NCSI_MAX_VLAN_VIDS) { + netdev_warn(dev, + "tried to add vlan id %u but NCSI max already registered (%u)\n", + vid, NCSI_MAX_VLAN_VIDS); + return -ENOSPC; } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); -- cgit v1.2.3 From 12ed3772b7e11b53a58ecbd8ce258271fb148cc6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 11 Oct 2017 20:10:31 -0700 Subject: ip: update policy routing config help The kernel config help for policy routing was still pointing at an ancient document from 2000 that refers to Linux 2.1. Update it to point to something that is at least occasionally updated. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 91a2557942fa..f48fe6fc7e8c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES address into account. Furthermore, the TOS (Type-Of-Service) field of the packet can be used for routing decisions as well. - If you are interested in this, please see the preliminary - documentation at - and . - You will need supporting software from - . + If you need more information, see the Linux Advanced + Routing and Traffic Control documentation at + If unsure, say N. -- cgit v1.2.3 From e7ad97938eaccb5a9ff4534167b1abafb507935c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Oct 2017 11:48:31 +0200 Subject: liquidio: fix timespec64_to_ns typo While experimenting with changes to the timekeeping code, I ran into a build error in the liquidio driver: drivers/net/ethernet/cavium/liquidio/lio_main.c: In function 'liquidio_ptp_settime': drivers/net/ethernet/cavium/liquidio/lio_main.c:1850:22: error: passing argument 1 of 'timespec_to_ns' from incompatible pointer type [-Werror=incompatible-pointer-types] The driver had a type mismatch since it was first merged, but this never caused problems because it is only built on 64-bit architectures that define timespec and timespec64 to the same type. If we ever want to compile-test the driver on 32-bit or change the way that 64-bit timespec64 is defined, we need to fix it, so let's just do it now. Fixes: f21fb3ed364b ("Add support of Cavium Liquidio ethernet adapters") Signed-off-by: Arnd Bergmann Acked-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e7f54948173f..5b19826a7e16 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1847,7 +1847,7 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp, struct lio *lio = container_of(ptp, struct lio, ptp_info); struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; - ns = timespec_to_ns(ts); + ns = timespec64_to_ns(ts); spin_lock_irqsave(&lio->ptp_lock, flags); lio_pci_writeq(oct, ns, CN6XXX_MIO_PTP_CLOCK_HI); -- cgit v1.2.3 From b9849860675f925da0380f4ea76c3f5041909737 Mon Sep 17 00:00:00 2001 From: Emiliano Ingrassia Date: Thu, 12 Oct 2017 11:00:47 +0200 Subject: net: stmmac: dwmac_lib: fix interchanged sleep/timeout values in DMA reset function The DMA reset timeout, used in read_poll_timeout, is ten times shorter than the sleep time. This patch fixes these values interchanging them, as it was before the read_poll_timeout introduction. Fixes: 8a70aeca80c2 ("net: stmmac: Use readl_poll_timeout") Signed-off-by: Emiliano Ingrassia Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 67af0bdd7f10..7516ca210855 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -34,7 +34,7 @@ int dwmac_dma_reset(void __iomem *ioaddr) err = readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), - 100000, 10000); + 10000, 100000); if (err) return -EBUSY; -- cgit v1.2.3 From 09001b03f722be96827bf8df5ba4d48b7ec0cc30 Mon Sep 17 00:00:00 2001 From: Wenhua Shi Date: Sat, 14 Oct 2017 18:51:36 +0200 Subject: net: fix typo in skbuff.c Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16982de649b9..e62476beee95 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1896,7 +1896,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) } /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, + * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. -- cgit v1.2.3 From 5903f594935a3841137c86b9d5b75143a5b7121c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 13 Oct 2017 19:22:35 +0200 Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl() When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(), the session may be unconnected. That is, it was created by pppol2tp_session_create() and hasn't been connected with pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check for this case in order to avoid dereferencing a NULL pointer. Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index bc6e8bfc5be4..f50452b919d5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -988,6 +988,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session, session->name, cmd, arg); sk = ps->sock; + if (!sk) + return -EBADR; + sock_hold(sk); switch (cmd) { -- cgit v1.2.3 From 3efc93c2bc243f940beb3324f67aa14e223abdd1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 13:39:22 -0400 Subject: net: dsa: mv88e6060: fix switch MAC address The 88E6060 Ethernet switch always transmits the multicast bit of the switch MAC address as a zero. It re-uses the corresponding bit 8 of the register "Switch MAC Address Register Bytes 0 & 1" for "DiffAddr". If the "DiffAddr" bit is 0, then all ports transmit the same source address. If it is set to 1, then bit 2:0 are used for the port number. The mv88e6060 driver is currently wrongly shifting the MAC address byte 0 by 9. To fix this, shift it by 8 as usual and clear its bit 0. Signed-off-by: Vivien Didelot Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index dce7fa57eb55..f123ed57630d 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -214,8 +214,14 @@ static int mv88e6060_setup(struct dsa_switch *ds) static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) { - /* Use the same MAC Address as FD Pause frames for all ports */ - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]); + u16 val = addr[0] << 8 | addr[1]; + + /* The multicast bit is always transmitted as a zero, so the switch uses + * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA. + */ + val &= 0xfeff; + + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); -- cgit v1.2.3 From c213eae8d3cd4c026f348ce4fd64f4754b3acf2b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:29 -0400 Subject: bnxt_en: Improve VF/PF link change logic. Link status query firmware messages originating from the VFs are forwarded to the PF. The driver handles these interactions in a workqueue for the VF and PF. The VF driver waits for the response from the PF in the workqueue. If the PF and VF driver are running on the same host and the work for both PF and VF are queued on the same workqueue, the VF driver may not get the response if the PF work item is queued behind it on the same workqueue. This will lead to the VF link query message timing out. To prevent this, we create a private workqueue for PFs instead of using the common workqueue. The VF query and PF response will never be on the same workqueue. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 66 +++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aacec8bc19d5..7906153c5c05 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -214,6 +214,8 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE, }; +static struct workqueue_struct *bnxt_pf_wq; + static bool bnxt_vf_pciid(enum board_idx idx) { return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); @@ -1024,12 +1026,28 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_napi *bnapi, return 0; } +static void bnxt_queue_sp_work(struct bnxt *bp) +{ + if (BNXT_PF(bp)) + queue_work(bnxt_pf_wq, &bp->sp_task); + else + schedule_work(&bp->sp_task); +} + +static void bnxt_cancel_sp_work(struct bnxt *bp) +{ + if (BNXT_PF(bp)) + flush_workqueue(bnxt_pf_wq); + else + cancel_work_sync(&bp->sp_task); +} + static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { if (!rxr->bnapi->in_reset) { rxr->bnapi->in_reset = true; set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } rxr->rx_next_cons = 0xffff; } @@ -1717,7 +1735,7 @@ static int bnxt_async_event_process(struct bnxt *bp, default: goto async_event_process_exit; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); async_event_process_exit: bnxt_ulp_async_events(bp, cmpl); return 0; @@ -1751,7 +1769,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp) set_bit(vf_id - bp->pf.first_vf_id, bp->pf.vf_event_bmap); set_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); break; case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: @@ -6647,7 +6665,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) vnic->rx_mask = mask; set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } } @@ -6920,7 +6938,7 @@ static void bnxt_tx_timeout(struct net_device *dev) netdev_err(bp->dev, "TX timeout detected, starting reset task!\n"); set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -6952,7 +6970,7 @@ static void bnxt_timer(unsigned long data) if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) && bp->stats_coal_ticks) { set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); @@ -7433,7 +7451,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, spin_unlock_bh(&bp->ntp_fltr_lock); set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); return new_fltr->sw_id; @@ -7516,7 +7534,7 @@ static void bnxt_udp_tunnel_add(struct net_device *dev, if (bp->vxlan_port_cnt == 1) { bp->vxlan_port = ti->port; set_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } break; case UDP_TUNNEL_TYPE_GENEVE: @@ -7533,7 +7551,7 @@ static void bnxt_udp_tunnel_add(struct net_device *dev, return; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } static void bnxt_udp_tunnel_del(struct net_device *dev, @@ -7572,7 +7590,7 @@ static void bnxt_udp_tunnel_del(struct net_device *dev, return; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, @@ -7720,7 +7738,7 @@ static void bnxt_remove_one(struct pci_dev *pdev) pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); bnxt_shutdown_tc(bp); - cancel_work_sync(&bp->sp_task); + bnxt_cancel_sp_work(bp); bp->sp_event = 0; bnxt_clear_int_mode(bp); @@ -8138,8 +8156,17 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else device_set_wakeup_capable(&pdev->dev, false); - if (BNXT_PF(bp)) + if (BNXT_PF(bp)) { + if (!bnxt_pf_wq) { + bnxt_pf_wq = + create_singlethread_workqueue("bnxt_pf_wq"); + if (!bnxt_pf_wq) { + dev_err(&pdev->dev, "Unable to create workqueue.\n"); + goto init_err_pci_clean; + } + } bnxt_init_tc(bp); + } rc = register_netdev(dev); if (rc) @@ -8375,4 +8402,17 @@ static struct pci_driver bnxt_pci_driver = { #endif }; -module_pci_driver(bnxt_pci_driver); +static int __init bnxt_init(void) +{ + return pci_register_driver(&bnxt_pci_driver); +} + +static void __exit bnxt_exit(void) +{ + pci_unregister_driver(&bnxt_pci_driver); + if (bnxt_pf_wq) + destroy_workqueue(bnxt_pf_wq); +} + +module_init(bnxt_init); +module_exit(bnxt_exit); -- cgit v1.2.3 From e2dc9b6e38fa3919e63d6d7905da70ca41cbf908 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:30 -0400 Subject: bnxt_en: Don't use rtnl lock to protect link change logic in workqueue. As a further improvement to the PF/VF link change logic, use a private mutex instead of the rtnl lock to protect link change logic. With the new mutex, we don't have to take the rtnl lock in the workqueue when we have to handle link related functions. If the VF and PF drivers are running on the same host and both take the rtnl lock and one is waiting for the other, it will cause timeout. This patch fixes these timeouts. Fixes: 90c694bb7181 ("bnxt_en: Fix RTNL lock usage on bnxt_update_link().") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++++++----------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7906153c5c05..3f596de2abe3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6345,7 +6345,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } if (link_re_init) { + mutex_lock(&bp->link_lock); rc = bnxt_update_phy_setting(bp); + mutex_unlock(&bp->link_lock); if (rc) netdev_warn(bp->dev, "failed to update phy settings\n"); } @@ -7043,30 +7045,28 @@ static void bnxt_sp_task(struct work_struct *work) if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) bnxt_hwrm_port_qstats(bp); - /* These functions below will clear BNXT_STATE_IN_SP_TASK. They - * must be the last functions to be called before exiting. - */ if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { - int rc = 0; + int rc; + mutex_lock(&bp->link_lock); if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event)) bnxt_hwrm_phy_qcaps(bp); - bnxt_rtnl_lock_sp(bp); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - rc = bnxt_update_link(bp, true); - bnxt_rtnl_unlock_sp(bp); + rc = bnxt_update_link(bp, true); + mutex_unlock(&bp->link_lock); if (rc) netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", rc); } if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) { - bnxt_rtnl_lock_sp(bp); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_get_port_module_status(bp); - bnxt_rtnl_unlock_sp(bp); + mutex_lock(&bp->link_lock); + bnxt_get_port_module_status(bp); + mutex_unlock(&bp->link_lock); } + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They + * must be the last functions to be called before exiting. + */ if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false); @@ -7766,6 +7766,7 @@ static int bnxt_probe_phy(struct bnxt *bp) rc); return rc; } + mutex_init(&bp->link_lock); rc = bnxt_update_link(bp, false); if (rc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7b888d4b2b55..d2925c04709a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1290,6 +1290,10 @@ struct bnxt { unsigned long *ntp_fltr_bmap; int ntp_fltr_count; + /* To protect link related settings during link changes and + * ethtool settings changes. + */ + struct mutex link_lock; struct bnxt_link_info link_info; struct ethtool_eee eee; u32 lpi_tmr_lo; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8eff05a3e0e4..b2cbc970b497 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1052,6 +1052,7 @@ static int bnxt_get_link_ksettings(struct net_device *dev, u32 ethtool_speed; ethtool_link_ksettings_zero_link_mode(lk_ksettings, supported); + mutex_lock(&bp->link_lock); bnxt_fw_to_ethtool_support_spds(link_info, lk_ksettings); ethtool_link_ksettings_zero_link_mode(lk_ksettings, advertising); @@ -1099,6 +1100,7 @@ static int bnxt_get_link_ksettings(struct net_device *dev, base->port = PORT_FIBRE; } base->phy_address = link_info->phy_addr; + mutex_unlock(&bp->link_lock); return 0; } @@ -1190,6 +1192,7 @@ static int bnxt_set_link_ksettings(struct net_device *dev, if (!BNXT_SINGLE_PF(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (base->autoneg == AUTONEG_ENABLE) { BNXT_ETHTOOL_TO_FW_SPDS(fw_advertising, lk_ksettings, advertising); @@ -1234,6 +1237,7 @@ static int bnxt_set_link_ksettings(struct net_device *dev, rc = bnxt_hwrm_set_link_setting(bp, set_pause, false); set_setting_exit: + mutex_unlock(&bp->link_lock); return rc; } -- cgit v1.2.3 From 7ab0760f5178169c4c218852f51646ea90817d7c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 13 Oct 2017 21:09:31 -0400 Subject: bnxt_en: Fix VF PCIe link speed and width logic. PCIE PCIE_EP_REG_LINK_STATUS_CONTROL register is only defined in PF config space, so we must read it from the PF. Fixes: 90c4f788f6c0 ("bnxt_en: Report PCIe link speed and width during driver load") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3f596de2abe3..4ffa0b1e565a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7965,7 +7965,7 @@ static void bnxt_parse_log_pcie_link(struct bnxt *bp) enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN; enum pci_bus_speed speed = PCI_SPEED_UNKNOWN; - if (pcie_get_minimum_link(bp->pdev, &speed, &width) || + if (pcie_get_minimum_link(pci_physfn(bp->pdev), &speed, &width) || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) netdev_info(bp->dev, "Failed to determine PCIe Link Info\n"); else -- cgit v1.2.3 From 021570793d8cd86cb62ac038c535f4450586b454 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:32 -0400 Subject: bnxt_en: Fix VF resource checking. In bnxt_sriov_enable(), we calculate to see if we have enough hardware resources to enable the requested number of VFs. The logic to check for minimum completion rings and statistics contexts is missing. Add the required checks so that VF configuration won't fail. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index d37925a8a65b..5ee18660bc33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -502,6 +502,7 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) int rc = 0, vfs_supported; int min_rx_rings, min_tx_rings, min_rss_ctxs; int tx_ok = 0, rx_ok = 0, rss_ok = 0; + int avail_cp, avail_stat; /* Check if we can enable requested num of vf's. At a mininum * we require 1 RX 1 TX rings for each VF. In this minimum conf @@ -509,6 +510,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) */ vfs_supported = *num_vfs; + avail_cp = bp->pf.max_cp_rings - bp->cp_nr_rings; + avail_stat = bp->pf.max_stat_ctxs - bp->num_stat_ctxs; + avail_cp = min_t(int, avail_cp, avail_stat); + while (vfs_supported) { min_rx_rings = vfs_supported; min_tx_rings = vfs_supported; @@ -523,10 +528,12 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) min_rx_rings) rx_ok = 1; } - if (bp->pf.max_vnics - bp->nr_vnics < min_rx_rings) + if (bp->pf.max_vnics - bp->nr_vnics < min_rx_rings || + avail_cp < min_rx_rings) rx_ok = 0; - if (bp->pf.max_tx_rings - bp->tx_nr_rings >= min_tx_rings) + if (bp->pf.max_tx_rings - bp->tx_nr_rings >= min_tx_rings && + avail_cp >= min_tx_rings) tx_ok = 1; if (bp->pf.max_rsscos_ctxs - bp->rsscos_nr_ctxs >= min_rss_ctxs) -- cgit v1.2.3 From cc72f3b1feb4fd38d33ab7a013d5ab95041cb8ba Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:33 -0400 Subject: bnxt_en: Fix possible corrupted NVRAM parameters from firmware response. In bnxt_find_nvram_item(), it is copying firmware response data after releasing the mutex. This can cause the firmware response data to be corrupted if the next firmware response overwrites the response buffer. The rare problem shows up when running ethtool -i repeatedly. Fix it by calling the new variant _hwrm_send_message_silent() that requires the caller to take the mutex and to release it after the response data has been copied. Fixes: 3ebf6f0a09a2 ("bnxt_en: Add installed-package version reporting via Ethtool GDRVINFO") Reported-by: Sarveswara Rao Mygapula Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4ffa0b1e565a..dc5de275352a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3466,6 +3466,12 @@ int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout) return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, false); } +int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len, + int timeout) +{ + return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true); +} + int hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout) { int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d2925c04709a..c911e69ff25f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1362,6 +1362,7 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16); int _hwrm_send_message(struct bnxt *, void *, u32, int); +int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout); int hwrm_send_message(struct bnxt *, void *, u32, int); int hwrm_send_message_silent(struct bnxt *, void *, u32, int); int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b2cbc970b497..3cbe771b3352 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1809,7 +1809,8 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, req.dir_ordinal = cpu_to_le16(ordinal); req.dir_ext = cpu_to_le16(ext); req.opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ; - rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc == 0) { if (index) *index = le16_to_cpu(output->dir_idx); @@ -1818,6 +1819,7 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, if (data_length) *data_length = le32_to_cpu(output->dir_data_length); } + mutex_unlock(&bp->hwrm_cmd_lock); return rc; } -- cgit v1.2.3 From 5b1e1a9ce06fd94b563d6c3dd896589231995d89 Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Fri, 13 Oct 2017 21:09:34 -0400 Subject: bnxt_en: Fix possible corruption in DCB parameters from firmware. hwrm_send_message() is replaced with _hwrm_send_message(), and hwrm_cmd_lock mutex lock is grabbed for the whole period of firmware call until the firmware DCB parameters have been copied. This will prevent possible corruption of the firmware data. Fixes: 7df4ae9fe855 ("bnxt_en: Implement DCBNL to support host-based DCBX.") Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index aa1f3a2c7a78..fed37cd9ae1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -50,7 +50,9 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_QCFG, -1, -1); req.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { u8 *pri2cos = &resp->pri0_cos_queue_id; int i, j; @@ -66,6 +68,7 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets) } } } + mutex_unlock(&bp->hwrm_cmd_lock); return rc; } @@ -119,9 +122,13 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets) int rc, i; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_QCFG, -1, -1); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); return rc; + } data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id); for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) { @@ -143,6 +150,7 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets) } } } + mutex_unlock(&bp->hwrm_cmd_lock); return 0; } @@ -240,12 +248,17 @@ static int bnxt_hwrm_queue_pfc_qcfg(struct bnxt *bp, struct ieee_pfc *pfc) int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_QCFG, -1, -1); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); return rc; + } pri_mask = le32_to_cpu(resp->flags); pfc->pfc_en = pri_mask; + mutex_unlock(&bp->hwrm_cmd_lock); return 0; } -- cgit v1.2.3 From fdf7cb4185b60c68e1a75e61691c4afdc15dea0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Sep 2017 14:54:54 +0200 Subject: mac80211: accept key reinstall without changing anything When a key is reinstalled we can reset the replay counters etc. which can lead to nonce reuse and/or replay detection being impossible, breaking security properties, as described in the "KRACK attacks". In particular, CVE-2017-13080 applies to GTK rekeying that happened in firmware while the host is in D3, with the second part of the attack being done after the host wakes up. In this case, the wpa_supplicant mitigation isn't sufficient since wpa_supplicant doesn't know the GTK material. In case this happens, simply silently accept the new key coming from userspace but don't take any action on it since it's the same key; this keeps the PN replay counters intact. Signed-off-by: Johannes Berg --- net/mac80211/key.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a98fc2b5e0dc..ae995c8480db 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -620,9 +620,6 @@ int ieee80211_key_link(struct ieee80211_key *key, pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; - key->local = sdata->local; - key->sdata = sdata; - key->sta = sta; mutex_lock(&sdata->local->key_mtx); @@ -633,6 +630,21 @@ int ieee80211_key_link(struct ieee80211_key *key, else old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + /* + * Silently accept key re-installation without really installing the + * new version of the key to avoid nonce reuse or replay issues. + */ + if (old_key && key->conf.keylen == old_key->conf.keylen && + !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) { + ieee80211_key_free_unused(key); + ret = 0; + goto out; + } + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + increment_tailroom_need_count(sdata); ieee80211_key_replace(sdata, sta, pairwise, old_key, key); @@ -648,6 +660,7 @@ int ieee80211_key_link(struct ieee80211_key *key, ret = 0; } + out: mutex_unlock(&sdata->local->key_mtx); return ret; -- cgit v1.2.3 From 8a212589fe0e45f26c549dfa271a157ca8eea1ac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:41 +0800 Subject: rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event Commit 085e1a65f04f ("rtnetlink: Do not generate notifications for MTU events") tried to fix the redundant notifications issue when ip link set mtu by removing NETDEV_CHANGEMTU event process in rtnetlink_event. But it also resulted in no notification generated when dev's mtu is changed via other methods, like: 'ifconfig eth1 mtu 1400' or 'echo 1400 > /sys/class/net/eth1/mtu' It would cause users not to be notified by this change. This patch is to fix it by bringing NETDEV_CHANGEMTU event back into rtnetlink_event, and the redundant notifications issue will be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: 085e1a65f04f ("rtnetlink: Do not generate notifications for MTU events") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4bcdcc68e92..72053ed7c891 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4279,6 +4279,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi switch (event) { case NETDEV_REBOOT: + case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: -- cgit v1.2.3 From ebdcf0450b020748c2dab6bfe44a5ac3c5159fb0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:42 +0800 Subject: rtnetlink: bring NETDEV_CHANGE_TX_QUEUE_LEN event process back in rtnetlink_event The same fix for changing mtu in the patch 'rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event' is needed for changing tx_queue_len. Note that the redundant notifications issue for tx_queue_len will be fixed in the later patch 'rtnetlink: do not send notification for tx_queue_len in do_setlink'. Fixes: 27b3b551d8a7 ("rtnetlink: Do not generate notifications for NETDEV_CHANGE_TX_QUEUE_LEN event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 72053ed7c891..bf473604f33d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4287,6 +4287,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: + case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), GFP_KERNEL); break; -- cgit v1.2.3 From e6e6659446c87057aede26a39d9f16b19001716f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:43 +0800 Subject: rtnetlink: bring NETDEV_POST_TYPE_CHANGE event process back in rtnetlink_event As I said in patch 'rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event', removing NETDEV_POST_TYPE_CHANGE event was not the right fix for the redundant notifications issue. So bring this event process back to rtnetlink_event and the old redundant notifications issue would be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: aef091ae58aa ("rtnetlink: Do not generate notifications for POST_TYPE_CHANGE event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bf473604f33d..8e44fd597f46 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4284,6 +4284,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: + case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: -- cgit v1.2.3 From dc709f375743ebf5c9326cc9b946f6f09a34ac44 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:44 +0800 Subject: rtnetlink: bring NETDEV_CHANGEUPPER event process back in rtnetlink_event libteam needs this event notification in userspace when dev's master dev has been changed. After this, the redundant notifications issue would be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: b6b36eb23a46 ("rtnetlink: Do not generate notifications for NETDEV_CHANGEUPPER event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8e44fd597f46..ab98c1c8b6f3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4286,6 +4286,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_BONDING_FAILOVER: case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: + case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: case NETDEV_CHANGE_TX_QUEUE_LEN: -- cgit v1.2.3 From 64ff90cc2e6f42596d7a0c37e41dc95292bb63b1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:45 +0800 Subject: rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink The check 'status & DO_SETLINK_NOTIFY' in do_setlink doesn't really work after status & DO_SETLINK_MODIFIED, as: DO_SETLINK_MODIFIED 0x1 DO_SETLINK_NOTIFY 0x3 Considering that notifications are suppposed to be sent only when status have the flag DO_SETLINK_NOTIFY, the right check would be: (status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY This would avoid lots of duplicated notifications when setting some properties of a link. Fixes: ba9989069f4e ("rtnl/do_setlink(): notify when a netdev is modified") Signed-off-by: Xin Long Acked-by: David Ahern Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab98c1c8b6f3..3e98fb557598 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2248,7 +2248,7 @@ static int do_setlink(const struct sk_buff *skb, errout: if (status & DO_SETLINK_MODIFIED) { - if (status & DO_SETLINK_NOTIFY) + if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) netdev_state_change(dev); if (err < 0) -- cgit v1.2.3 From 2d7f669b42a97022c8c2b6cd86f3990be5fcd1bc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:46 +0800 Subject: rtnetlink: do not set notification for tx_queue_len in do_setlink NETDEV_CHANGE_TX_QUEUE_LEN event process in rtnetlink_event would send a notification for userspace and tx_queue_len's setting in do_setlink would trigger NETDEV_CHANGE_TX_QUEUE_LEN. So it shouldn't set DO_SETLINK_NOTIFY status for this change to send a notification any more. Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3e98fb557598..a6bcf86ce471 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2093,7 +2093,7 @@ static int do_setlink(const struct sk_buff *skb, dev->tx_queue_len = orig_len; goto errout; } - status |= DO_SETLINK_NOTIFY; + status |= DO_SETLINK_MODIFIED; } } -- cgit v1.2.3 From 2459b4c635858094df78abb9ca87d99f89fe8ca5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 11 Oct 2017 16:24:48 +0200 Subject: net: enable interface alias removal via rtnl IFLA_IFALIAS is defined as NLA_STRING. It means that the minimal length of the attribute is 1 ("\0"). However, to remove an alias, the attribute length must be 0 (see dev_set_alias()). Let's define the type to NLA_BINARY to allow 0-length string, so that the alias can be removed. Example: $ ip l s dummy0 alias foo $ ip l l dev dummy0 5: dummy0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff alias foo Before the patch: $ ip l s dummy0 alias "" RTNETLINK answers: Numerical result out of range After the patch: $ ip l s dummy0 alias "" $ ip l l dev dummy0 5: dummy0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff CC: Oliver Hartkopp CC: Stephen Hemminger Fixes: 96ca4a2cc145 ("net: remove ifalias on empty given alias") Reported-by: Julien FLoret Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6bcf86ce471..5ace48926b19 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1483,7 +1483,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_NET_NS_FD] = { .type = NLA_U32 }, - [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, + /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to + * allow 0-length string (needed to remove an alias). + */ + [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, -- cgit v1.2.3 From 0ad646c81b2182f7fa67ec0c8c825e0ee165696d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 11:58:53 -0700 Subject: tun: call dev_get_valid_name() before register_netdevice() register_netdevice() could fail early when we have an invalid dev name, in which case ->ndo_uninit() is not called. For tun device, this is a problem because a timer etc. are already initialized and it expects ->ndo_uninit() to clean them up. We could move these initializations into a ->ndo_init() so that register_netdevice() knows better, however this is still complicated due to the logic in tun_detach(). Therefore, I choose to just call dev_get_valid_name() before register_netdevice(), which is quicker and much easier to audit. And for this specific case, it is already enough. Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") Reported-by: Dmitry Alexeev Cc: Jason Wang Cc: "Michael S. Tsirkin" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5ce580f413b9..e21bf90b819f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2027,6 +2027,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; + err = dev_get_valid_name(net, dev, name); + if (err) + goto err_free_dev; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..2eaac7d75af4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3694,6 +3694,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name); + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) diff --git a/net/core/dev.c b/net/core/dev.c index 588b473194a8..11596a302a26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1147,9 +1147,8 @@ static int dev_alloc_name_ns(struct net *net, return ret; } -static int dev_get_valid_name(struct net *net, - struct net_device *dev, - const char *name) +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net); @@ -1165,6 +1164,7 @@ static int dev_get_valid_name(struct net *net, return 0; } +EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device -- cgit v1.2.3 From c019b5166e11faaf9ed3b64316ed338eaa19de60 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 16 Oct 2017 12:19:48 +0300 Subject: net/sched: cls_flower: Set egress_dev mark when calling into the HW driver Commit 7091d8c '(net/sched: cls_flower: Add offload support using egress Hardware device') made sure (when fl_hw_replace_filter is called) to put the egress_dev mark on persisent structure instance. Hence, following calls into the HW driver for stats and deletion will note it and act accordingly. With commit de4784ca030f this property is lost and hence when called, the HW driver failes to operate (stats, delete) on the offloaded flow. Fix it by setting the egress_dev flag whenever the ingress device is different from the hw device since this is exactly the condition under which we're calling into the HW driver through the egress port net-device. Fixes: de4784ca030f ('net: sched: get rid of struct tc_to_netdev') Signed-off-by: Or Gerlitz Signed-off-by: Roi Dayan Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d230cb4c8094..b480d7c792ba 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -234,6 +234,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; + cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); } @@ -289,6 +290,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; + cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); -- cgit v1.2.3 From 823038ca030e9f8283518b1e6a5a6879edcbe057 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 16 Oct 2017 19:43:15 +0800 Subject: dev_ioctl: add missing NETDEV_CHANGE_TX_QUEUE_LEN event notification When changing dev tx_queue_len via netlink or net-sysfs, a NETDEV_CHANGE_TX_QUEUE_LEN event notification will be called. But dev_ioctl missed this event notification, which could cause no userspace notification would be sent. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 709a4e6fb447..f9c7a88cd981 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -303,7 +303,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; + if (dev->tx_queue_len ^ ifr->ifr_qlen) { + unsigned int orig_len = dev->tx_queue_len; + + dev->tx_queue_len = ifr->ifr_qlen; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + return err; + } + } return 0; case SIOCSIFNAME: -- cgit v1.2.3 From d965465b60bad79d0b067f1009ba80ae76a6561a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Oct 2017 16:28:28 +0200 Subject: mlxsw: core: Fix possible deadlock When an EMAD is transmitted, a timeout work item is scheduled with a delay of 200ms, so that another EMAD will be retried until a maximum of five retries. In certain situations, it's possible for the function waiting on the EMAD to be associated with a work item that is queued on the same workqueue (`mlxsw_core`) as the timeout work item. This results in flushing a work item on the same workqueue. According to commit e159489baa71 ("workqueue: relax lockdep annotation on flush_work()") the above may lead to a deadlock in case the workqueue has only one worker active or if the system in under memory pressure and the rescue worker is in use. The latter explains the very rare and random nature of the lockdep splats we have been seeing: [ 52.730240] ============================================ [ 52.736179] WARNING: possible recursive locking detected [ 52.742119] 4.14.0-rc3jiri+ #4 Not tainted [ 52.746697] -------------------------------------------- [ 52.752635] kworker/1:3/599 is trying to acquire lock: [ 52.758378] (mlxsw_core_driver_name){+.+.}, at: [] flush_work+0x3a4/0x5e0 [ 52.767837] but task is already holding lock: [ 52.774360] (mlxsw_core_driver_name){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.784495] other info that might help us debug this: [ 52.791794] Possible unsafe locking scenario: [ 52.798413] CPU0 [ 52.801144] ---- [ 52.803875] lock(mlxsw_core_driver_name); [ 52.808556] lock(mlxsw_core_driver_name); [ 52.813236] *** DEADLOCK *** [ 52.819857] May be due to missing lock nesting notation [ 52.827450] 3 locks held by kworker/1:3/599: [ 52.832221] #0: (mlxsw_core_driver_name){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.842846] #1: ((&(&bridge->fdb_notify.dw)->work)){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.854537] #2: (rtnl_mutex){+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.863021] stack backtrace: [ 52.867890] CPU: 1 PID: 599 Comm: kworker/1:3 Not tainted 4.14.0-rc3jiri+ #4 [ 52.875773] Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016 [ 52.886267] Workqueue: mlxsw_core mlxsw_sp_fdb_notify_work [mlxsw_spectrum] [ 52.894060] Call Trace: [ 52.909122] __lock_acquire+0xf6f/0x2a10 [ 53.025412] lock_acquire+0x158/0x440 [ 53.047557] flush_work+0x3c4/0x5e0 [ 53.087571] __cancel_work_timer+0x3ca/0x5e0 [ 53.177051] cancel_delayed_work_sync+0x13/0x20 [ 53.182142] mlxsw_reg_trans_bulk_wait+0x12d/0x7a0 [mlxsw_core] [ 53.194571] mlxsw_core_reg_access+0x586/0x990 [mlxsw_core] [ 53.225365] mlxsw_reg_query+0x10/0x20 [mlxsw_core] [ 53.230882] mlxsw_sp_fdb_notify_work+0x2a3/0x9d0 [mlxsw_spectrum] [ 53.237801] process_one_work+0x8f1/0x12f0 [ 53.321804] worker_thread+0x1fd/0x10c0 [ 53.435158] kthread+0x28e/0x370 [ 53.448703] ret_from_fork+0x2a/0x40 [ 53.453017] mlxsw_spectrum 0000:01:00.0: EMAD retries (2/5) (tid=bf4549b100000774) [ 53.453119] mlxsw_spectrum 0000:01:00.0: EMAD retries (5/5) (tid=bf4549b100000770) [ 53.453132] mlxsw_spectrum 0000:01:00.0: EMAD reg access failed (tid=bf4549b100000770,reg_id=200b(sfn),type=query,status=0(operation performed)) [ 53.453143] mlxsw_spectrum 0000:01:00.0: Failed to get FDB notifications Fix this by creating another workqueue for EMAD timeouts, thereby preventing the situation of a work item trying to flush a work item queued on the same workqueue. Fixes: caf7297e7ab5f ("mlxsw: core: Introduce support for asynchronous EMAD register access") Signed-off-by: Ido Schimmel Reported-by: Jiri Pirko Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 9d5e7cf288be..f3315bc874ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -96,6 +96,7 @@ struct mlxsw_core { const struct mlxsw_bus *bus; void *bus_priv; const struct mlxsw_bus_info *bus_info; + struct workqueue_struct *emad_wq; struct list_head rx_listener_list; struct list_head event_listener_list; struct { @@ -465,7 +466,7 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); - mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, @@ -587,12 +588,18 @@ static const struct mlxsw_listener mlxsw_emad_rx_listener = static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + struct workqueue_struct *emad_wq; u64 tid; int err; if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; + emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + if (!emad_wq) + return -ENOMEM; + mlxsw_core->emad_wq = emad_wq; + /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. @@ -619,6 +626,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) err_emad_trap_set: mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); return err; } @@ -631,6 +639,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) mlxsw_core->emad.use_emad = false; mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); } static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, -- cgit v1.2.3 From 2de09681e4ce8b1caa79d2e4482b72d8ef41c550 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 16 Oct 2017 10:02:11 -0500 Subject: ibmvnic: Fix calculation of number of TX header descriptors This patch correctly sets the number of additional header descriptors that will be sent in an indirect SCRQ entry. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cb8182f4fdfa..c66abd476023 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1093,11 +1093,12 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, * places them in a descriptor array, scrq_arr */ -static void create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, - union sub_crq *scrq_arr) +static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, + union sub_crq *scrq_arr) { union sub_crq hdr_desc; int tmp_len = len; + int num_descs = 0; u8 *data, *cur; int tmp; @@ -1126,7 +1127,10 @@ static void create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, tmp_len -= tmp; *scrq_arr = hdr_desc; scrq_arr++; + num_descs++; } + + return num_descs; } /** @@ -1144,16 +1148,12 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; - int tot_len, len; + int tot_len; u8 *hdr_data = txbuff->hdr_data; tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, txbuff->hdr_data); - len = tot_len; - len -= 24; - if (len > 0) - num_entries += len % 29 ? len / 29 + 1 : len / 29; - create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, + *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, txbuff->indir_arr + 1); } -- cgit v1.2.3 From 48044eb490be71c203e14dd89e8bae87209eab52 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Oct 2017 17:09:53 +0200 Subject: netlink: fix netlink_ack() extack race It seems that it's possible to toggle NETLINK_F_EXT_ACK through setsockopt() while another thread/CPU is building a message inside netlink_ack(), which could then trigger the WARN_ON()s I added since if it goes from being turned off to being turned on between allocating and filling the message, the skb could end up being too small. Avoid this whole situation by storing the value of this flag in a separate variable and using that throughout the function instead. Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f34750691c5c..b93148e8e9fb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2307,6 +2307,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); unsigned int flags = 0; + bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; /* Error messages get the original request appened, unless the user * requests to cap the error message, and get extra error data if @@ -2317,7 +2318,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, payload += nlmsg_len(nlh); else flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (nlk_has_extack && extack) { if (extack->_msg) tlvlen += nla_total_size(strlen(extack->_msg) + 1); if (extack->bad_attr) @@ -2326,8 +2327,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, } else { flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && - extack && extack->cookie_len) + if (nlk_has_extack && extack && extack->cookie_len) tlvlen += nla_total_size(extack->cookie_len); } @@ -2355,7 +2355,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (nlk_has_extack && extack) { if (err) { if (extack->_msg) WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, -- cgit v1.2.3 From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 11:16:55 -0700 Subject: bpf: disallow arithmetic operations on context pointer Commit f1174f77b50c ("bpf/verifier: rework value tracking") removed the crafty selection of which pointer types are allowed to be modified. This is OK for most pointer types since adjust_ptr_min_max_vals() will catch operations on immutable pointers. One exception is PTR_TO_CTX which is now allowed to be offseted freely. The intent of aforementioned commit was to allow context access via modified registers. The offset passed to ->is_valid_access() verifier callback has been adjusted by the value of the variable offset. What is missing, however, is taking the variable offset into account when the context register is used. Or in terms of the code adding the offset to the value passed to the ->convert_ctx_access() callback. This leads to the following eBPF user code: r1 += 68 r0 = *(u32 *)(r1 + 8) exit being translated to this in kernel space: 0: (07) r1 += 68 1: (61) r0 = *(u32 *)(r1 +180) 2: (95) exit Offset 8 is corresponding to 180 in the kernel, but offset 76 is valid too. Verifier will "accept" access to offset 68+8=76 but then "convert" access to offset 8 as 180. Effective access to offset 248 is beyond the kernel context. (This is a __sk_buff example on a debug-heavy kernel - packet mark is 8 -> 180, 76 would be data.) Dereferencing the modified context pointer is not as easy as dereferencing other types, because we have to translate the access to reading a field in kernel structures which is usually at a different offset and often of a different size. To allow modifying the pointer we would have to make sure that given eBPF instruction will always access the same field or the fields accessed are "compatible" in terms of offset and size... Disallow dereferencing modified context pointers and add to selftests the test case described here. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 ++++++-- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..20f3889c006e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* ctx accesses must be at a fixed offset, so that we can * determine what type of data were returned. */ - if (!tnum_is_const(reg->var_off)) { + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", + regno, reg->off, off - reg->off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn tn_buf, off, size); return -EACCES; } - off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..3c7d3a45a3c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,20 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From ad2302345d59d29232cb668baaae9e840925d153 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 06:59:15 +0000 Subject: can: flexcan: fix state transition regression Update state upon any interrupt to report correct state transitions in case the flexcan core enabled the broken error state quirk fix. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 13f0f219d8aa..df4bfb83024c 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -765,8 +765,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, ®s->esr); } - /* state change interrupt */ - if (reg_esr & FLEXCAN_ESR_ERR_STATE) + /* state change interrupt or broken error state quirk fix is enabled */ + if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE)) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ -- cgit v1.2.3 From 2f8639b24b4f4f9dd6cf7c1f2aea90e2fcbcc451 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:01:23 +0000 Subject: can: flexcan: rename legacy error state quirk Rename FLEXCAN_QUIRK_BROKEN_ERR_STATE to FLEXCAN_QUIRK_BROKEN_WERR_STATE for better description of the missing [TR]WRN_INT quirk. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index df4bfb83024c..e163c55e737b 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -193,7 +193,7 @@ * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ -#define FLEXCAN_QUIRK_BROKEN_ERR_STATE BIT(1) /* [TR]WRN_INT not connected */ +#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1) /* [TR]WRN_INT not connected */ #define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2) /* Disable RX FIFO Global mask */ #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ @@ -281,7 +281,7 @@ struct flexcan_priv { }; static const struct flexcan_devtype_data fsl_p1010_devtype_data = { - .quirks = FLEXCAN_QUIRK_BROKEN_ERR_STATE, + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, }; static const struct flexcan_devtype_data fsl_imx28_devtype_data; @@ -767,7 +767,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* state change interrupt or broken error state quirk fix is enabled */ if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || - (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE)) + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE)) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ @@ -888,7 +888,7 @@ static int flexcan_chip_start(struct net_device *dev) * on most Flexcan cores, too. Otherwise we don't get * any error warning or passive interrupts. */ - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE || + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE || priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) reg_ctrl |= FLEXCAN_CTRL_ERR_MSK; else -- cgit v1.2.3 From da49a8075c00b4d98ef069a0ee201177a8b79ead Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:03:58 +0000 Subject: can: flexcan: implement error passive state quirk Add FLEXCAN_QUIRK_BROKEN_PERR_STATE for better description of the missing error passive interrupt quirk. Error interrupt flooding may happen if the broken error state quirk fix is enabled. For example, in case there is singled out node on the bus and the node sends a frame, then error interrupt flooding happens and will not stop because the node cannot go to bus off. The flooding will stop after another node connected to the bus again. If high bitrate configured on the low end system, then the flooding may causes performance issue, hence, this patch mitigates this by: 1. disable error interrupt upon error passive state transition 2. re-enable error interrupt upon error warning state transition 3. disable/enable error interrupt upon error active state transition depends on FLEXCAN_QUIRK_BROKEN_WERR_STATE In this way, the driver is still able to report correct state transitions without additional latency. When there are bus problems, flooding of error interrupts is limited to the number of frames required to change state from error warning to error passive if the core has [TR]WRN_INT connected (FLEXCAN_QUIRK_BROKEN_WERR_STATE is not enabled), otherwise, the flooding is limited to the number of frames required to change state from error active to error passive. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 75 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index e163c55e737b..c83a09fa4166 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -182,14 +182,14 @@ /* FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err RTR re- - * Filter? connected? detection ception in MB - * MX25 FlexCAN2 03.00.00.00 no no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no no - * MX35 FlexCAN2 03.00.00.00 no no no no - * MX53 FlexCAN2 03.00.00.00 yes no no no - * MX6s FlexCAN3 10.00.12.00 yes yes no yes - * VF610 FlexCAN3 ? no yes yes yes? + * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re- + * Filter? connected? Passive detection ception in MB + * MX25 FlexCAN2 03.00.00.00 no no ? no no + * MX28 FlexCAN2 03.00.04.00 yes yes no no no + * MX35 FlexCAN2 03.00.00.00 no no ? no no + * MX53 FlexCAN2 03.00.00.00 yes no no no no + * MX6s FlexCAN3 10.00.12.00 yes yes no no yes + * VF610 FlexCAN3 ? no yes ? yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -198,6 +198,7 @@ #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ +#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */ /* Structure of the message buffer */ struct flexcan_mb { @@ -335,6 +336,22 @@ static inline void flexcan_write(u32 val, void __iomem *addr) } #endif +static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->regs; + u32 reg_ctrl = (priv->reg_ctrl_default | FLEXCAN_CTRL_ERR_MSK); + + flexcan_write(reg_ctrl, ®s->ctrl); +} + +static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->regs; + u32 reg_ctrl = (priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_MSK); + + flexcan_write(reg_ctrl, ®s->ctrl); +} + static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv) { if (!priv->reg_xceiver) @@ -713,6 +730,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) struct flexcan_regs __iomem *regs = priv->regs; irqreturn_t handled = IRQ_NONE; u32 reg_iflag1, reg_esr; + enum can_state last_state = priv->can.state; reg_iflag1 = flexcan_read(®s->iflag1); @@ -767,7 +785,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* state change interrupt or broken error state quirk fix is enabled */ if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || - (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE)) + (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE))) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ @@ -775,6 +794,44 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) flexcan_irq_bus_err(dev, reg_esr); + /* availability of error interrupt among state transitions in case + * bus error reporting is de-activated and + * FLEXCAN_QUIRK_BROKEN_PERR_STATE is enabled: + * +--------------------------------------------------------------+ + * | +----------------------------------------------+ [stopped / | + * | | | sleeping] -+ + * +-+-> active <-> warning <-> passive -> bus off -+ + * ___________^^^^^^^^^^^^_______________________________ + * disabled(1) enabled disabled + * + * (1): enabled if FLEXCAN_QUIRK_BROKEN_WERR_STATE is enabled + */ + if ((last_state != priv->can.state) && + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) && + !(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) { + switch (priv->can.state) { + case CAN_STATE_ERROR_ACTIVE: + if (priv->devtype_data->quirks & + FLEXCAN_QUIRK_BROKEN_WERR_STATE) + flexcan_error_irq_enable(priv); + else + flexcan_error_irq_disable(priv); + break; + + case CAN_STATE_ERROR_WARNING: + flexcan_error_irq_enable(priv); + break; + + case CAN_STATE_ERROR_PASSIVE: + case CAN_STATE_BUS_OFF: + flexcan_error_irq_disable(priv); + break; + + default: + break; + } + } + return handled; } -- cgit v1.2.3 From cf9c04677f2bf599b44511963039ec6e25583feb Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:05:50 +0000 Subject: can: flexcan: fix i.MX6 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for i.MX6 to report correct state transitions. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index c83a09fa4166..d6ad12744ff1 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -289,7 +289,7 @@ static const struct flexcan_devtype_data fsl_imx28_devtype_data; static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct flexcan_devtype_data fsl_vf610_devtype_data = { -- cgit v1.2.3 From 083c5571290a2d4308b75f1a59cf376b6e907808 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:08:23 +0000 Subject: can: flexcan: fix i.MX28 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for i.MX28 to report correct state transitions, especially to error passive. Signed-off-by: Wolfgang Grandegger Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d6ad12744ff1..ed544c44848f 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -285,7 +285,9 @@ static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, }; -static const struct flexcan_devtype_data fsl_imx28_devtype_data; +static const struct flexcan_devtype_data fsl_imx28_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE, +}; static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | -- cgit v1.2.3 From fb5b91d61bebc24686ffc379138fd67808b1a1e6 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:09:37 +0000 Subject: can: flexcan: fix p1010 state transition issue Enable FLEXCAN_QUIRK_BROKEN_WERR_STATE and FLEXCAN_QUIRK_BROKEN_PERR_STATE for p1010 to report correct state transitions. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ed544c44848f..a13a4896a8bd 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -282,7 +282,8 @@ struct flexcan_priv { }; static const struct flexcan_devtype_data fsl_p1010_devtype_data = { - .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct flexcan_devtype_data fsl_imx28_devtype_data = { -- cgit v1.2.3 From 62c04647c6f44fa3d5d0c077133da0aa1cbbc34c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 16:02:35 +0100 Subject: can: bcm: check for null sk before deferencing it via the call to sock_net The assignment of net via call sock_net will dereference sk. This is performed before a sanity null check on sk, so there could be a potential null dereference on the sock_net call if sk is null. Fix this by assigning net after the sk null check. Also replace the sk == NULL with the more usual !sk idiom. Detected by CoverityScan CID#1431862 ("Dereference before null check") Fixes: 384317ef4187 ("can: network namespace support for CAN_BCM protocol") Signed-off-by: Colin Ian King Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 47a8748d953a..13690334efa3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk) static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; - if (sk == NULL) + if (!sk) return 0; + net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ -- cgit v1.2.3 From cae1d5b78fb4874086170ad07921bca59ea2e893 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 17 Oct 2017 07:18:35 +0200 Subject: can: af_can: do not access proto_tab directly use rcu_access_pointer instead "proto_tab" is a RCU protected array, when directly accessing the array, sparse throws these warnings: CHECK /srv/work/frogger/socketcan/linux/net/can/af_can.c net/can/af_can.c:115:14: error: incompatible types in comparison expression (different address spaces) net/can/af_can.c:795:17: error: incompatible types in comparison expression (different address spaces) net/can/af_can.c:816:9: error: incompatible types in comparison expression (different address spaces) This patch fixes the problem by using rcu_access_pointer() and annotating "proto_tab" array as __rcu. Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 88edac0f3e36..eb1ad74b40f4 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); static struct kmem_cache *rcv_cache __read_mostly; /* table of registered CAN protocols */ -static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; +static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly; static DEFINE_MUTEX(proto_tab_lock); static atomic_t skbcounter = ATOMIC_INIT(0); @@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp) mutex_lock(&proto_tab_lock); - if (proto_tab[proto]) { + if (rcu_access_pointer(proto_tab[proto])) { pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; } else @@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp) int proto = cp->protocol; mutex_lock(&proto_tab_lock); - BUG_ON(proto_tab[proto] != cp); + BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp); RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); -- cgit v1.2.3 From 5a606223c6b5b7560da253ed52e62c67fa18e29b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 29 Jul 2017 11:51:01 +0200 Subject: can: af_can: can_pernet_init(): add missing error handling for kzalloc returning NULL This patch adds the missing check and error handling for out-of-memory situations, when kzalloc cannot allocate memory. Fixes: cb5635a36776 ("can: complete initial namespace support") Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index eb1ad74b40f4..ecd5c703d11e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -875,9 +875,14 @@ static int can_pernet_init(struct net *net) spin_lock_init(&net->can.can_rcvlists_lock); net->can.can_rx_alldev_list = kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); - + if (!net->can.can_rx_alldev_list) + goto out; net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); + if (!net->can.can_stats) + goto out_free_alldev_list; net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); + if (!net->can.can_pstats) + goto out_free_can_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ @@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net) } return 0; + + out_free_can_stats: + kfree(net->can.can_stats); + out_free_alldev_list: + kfree(net->can.can_rx_alldev_list); + out: + return -ENOMEM; } static void can_pernet_exit(struct net *net) -- cgit v1.2.3 From 72d92e865d1560723e1957ee3f393688c49ca5bf Mon Sep 17 00:00:00 2001 From: Stefan Mätje Date: Wed, 18 Oct 2017 13:25:17 +0200 Subject: can: esd_usb2: Fix can_dlc value for received RTR, frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dlc member of the struct rx_msg contains also the ESD_RTR flag to mark received RTR frames. Without the fix the can_dlc value for received RTR frames would always be set to 8 by get_can_dlc() instead of the received value. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Signed-off-by: Stefan Mätje Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index be928ce62d32..9fdb0f0bfa06 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -333,7 +333,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, } cf->can_id = id & ESD_IDMASK; - cf->can_dlc = get_can_dlc(msg->msg.rx.dlc); + cf->can_dlc = get_can_dlc(msg->msg.rx.dlc & ~ESD_RTR); if (id & ESD_EXTID) cf->can_id |= CAN_EFF_FLAG; -- cgit v1.2.3 From 97819f943063b622eca44d3644067c190dc75039 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 14 Sep 2017 18:37:14 +0200 Subject: can: gs_usb: fix busy loop if no more TX context is available If sending messages with no cable connected, it quickly happens that there is no more TX context available. Then "gs_can_start_xmit()" returns with "NETDEV_TX_BUSY" and the upper layer does retry immediately keeping the CPU busy. To fix that issue, I moved "atomic_dec(&dev->active_tx_urbs)" from "gs_usb_xmit_callback()" to the TX done handling in "gs_usb_receive_bulk_callback()". Renaming "active_tx_urbs" to "active_tx_contexts" and moving it into "gs_[alloc|free]_tx_context()" would also make sense. Signed-off-by: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index afcc1312dbaf..68ac3e88a8ce 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -375,6 +375,8 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) gs_free_tx_context(txc); + atomic_dec(&dev->active_tx_urbs); + netif_wake_queue(netdev); } @@ -463,14 +465,6 @@ static void gs_usb_xmit_callback(struct urb *urb) urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); - - atomic_dec(&dev->active_tx_urbs); - - if (!netif_device_present(netdev)) - return; - - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, -- cgit v1.2.3 From cd7aea1875c54c69a54a333b75e9d8732503f273 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:03 +0000 Subject: net: ena: reduce the severity of some printouts Decrease log level of checksum errors as these messages can be triggered remotely by bad packets. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index f7dc22f65d9f..7040b9052747 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -966,7 +966,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring, u64_stats_update_begin(&rx_ring->syncp); rx_ring->rx_stats.bad_csum++; u64_stats_update_end(&rx_ring->syncp); - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "RX IPv4 header checksum error\n"); return; } @@ -979,7 +979,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring, u64_stats_update_begin(&rx_ring->syncp); rx_ring->rx_stats.bad_csum++; u64_stats_update_end(&rx_ring->syncp); - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "RX L4 checksum error\n"); skb->ip_summed = CHECKSUM_NONE; return; -- cgit v1.2.3 From 411838e7b41c52cf4afa51929cec54c2162472ff Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:04 +0000 Subject: net: ena: fix rare kernel crash when bar memory remap fails This failure is rare and only found on testing where deliberately fail devm_ioremap() [ 451.170464] ena 0000:04:00.0: failed to remap regs bar 451.170549] Workqueue: pciehp-1 pciehp_power_thread [ 451.170551] task: ffff88085a5f2d00 task.stack: ffffc9000756c000 [ 451.170552] RIP: 0010:devm_iounmap+0x2d/0x40 [ 451.170553] RSP: 0018:ffffc9000756fac0 EFLAGS: 00010282 [ 451.170554] RAX: 00000000fffffffe RBX: 0000000000000000 RCX: 0000000000000000 [ 451.170555] RDX: ffffffff813a7e00 RSI: 0000000000000282 RDI: 0000000000000282 [ 451.170556] RBP: ffffc9000756fac8 R08: 00000000fffffffe R09: 00000000000009b7 [ 451.170557] R10: 0000000000000005 R11: 00000000000009b6 R12: ffff880856c9d0a0 [ 451.170558] R13: ffffc9000f5c90c0 R14: ffff880856c9d0a0 R15: 0000000000000028 [ 451.170559] FS: 0000000000000000(0000) GS:ffff88085f400000(0000) knlGS:0000000000000000 [ 451.170560] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 451.170561] CR2: 00007f169038b000 CR3: 0000000001c09000 CR4: 00000000003406f0 [ 451.170562] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 451.170562] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 451.170563] Call Trace: [ 451.170572] ena_release_bars.isra.48+0x34/0x60 [ena] [ 451.170574] ena_probe+0x144/0xd90 [ena] [ 451.170579] ? ida_simple_get+0x98/0x100 [ 451.170585] ? kernfs_next_descendant_post+0x40/0x50 [ 451.170591] local_pci_probe+0x45/0xa0 [ 451.170592] pci_device_probe+0x157/0x180 [ 451.170599] driver_probe_device+0x2a8/0x460 [ 451.170600] __device_attach_driver+0x7e/0xe0 [ 451.170602] ? driver_allows_async_probing+0x30/0x30 [ 451.170603] bus_for_each_drv+0x68/0xb0 [ 451.170605] __device_attach+0xdd/0x160 [ 451.170607] device_attach+0x10/0x20 [ 451.170610] pci_bus_add_device+0x4f/0xa0 [ 451.170611] pci_bus_add_devices+0x39/0x70 [ 451.170613] pciehp_configure_device+0x96/0x120 [ 451.170614] pciehp_enable_slot+0x1b3/0x290 [ 451.170616] pciehp_power_thread+0x3b/0xb0 [ 451.170622] process_one_work+0x149/0x360 [ 451.170623] worker_thread+0x4d/0x3c0 [ 451.170626] kthread+0x109/0x140 [ 451.170627] ? rescuer_thread+0x380/0x380 [ 451.170628] ? kthread_park+0x60/0x60 [ 451.170632] ret_from_fork+0x25/0x30 Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7040b9052747..c6bd5e24005d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3064,7 +3064,8 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) if (ena_dev->mem_bar) devm_iounmap(&pdev->dev, ena_dev->mem_bar); - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + if (ena_dev->reg_bar) + devm_iounmap(&pdev->dev, ena_dev->reg_bar); release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); -- cgit v1.2.3 From a59df396768a7e37c6ddafeb9666a30c8ac07854 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:05 +0000 Subject: net: ena: fix wrong max Tx/Rx queues on ethtool ethtool ena_get_channels() expose the max number of queues as the max number of queues ENA supports (128 queues) and not the actual number of created queues. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index b1212debc2e1..967020fb26ee 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -742,8 +742,8 @@ static void ena_get_channels(struct net_device *netdev, { struct ena_adapter *adapter = netdev_priv(netdev); - channels->max_rx = ENA_MAX_NUM_IO_QUEUES; - channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_rx = adapter->num_queues; + channels->max_tx = adapter->num_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->num_queues; -- cgit v1.2.3 From 0ea7eeec24be5f04ae80d68f5b1ea3a11f49de2f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:52 +0200 Subject: mm, percpu: add support for __GFP_NOWARN flag Add an option for pcpu_alloc() to support __GFP_NOWARN flag. Currently, we always throw a warning when size or alignment is unsupported (and also dump stack on failed allocation requests). The warning itself is harmless since we return NULL anyway for any failed request, which callers are required to handle anyway. However, it becomes harmful when panic_on_warn is set. The rationale for the WARN() in pcpu_alloc() is that it can be tracked when larger than supported allocation requests are made such that allocations limits can be tweaked if warranted. This makes sense for in-kernel users, however, there are users of pcpu allocator where allocation size is derived from user space requests, e.g. when creating BPF maps. In these cases, the requests should fail gracefully without throwing a splat. The current work-around was to check allocation size against the upper limit of PCPU_MIN_UNIT_SIZE from call-sites for bailing out prior to a call to pcpu_alloc() in order to avoid throwing the WARN(). This is bad in multiple ways since PCPU_MIN_UNIT_SIZE is an implementation detail, and having the checks on call-sites only complicates the code for no good reason. Thus, lets fix it generically by supporting the __GFP_NOWARN flag that users can then use with calling the __alloc_percpu_gfp() helper instead. Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: Mark Rutland Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- mm/percpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index aa121cef76de..a0e0c82c1e4c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1329,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @gfp: allocation flags * * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't - * contain %GFP_KERNEL, the allocation is atomic. + * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN + * then no warning will be triggered on invalid or failed allocation + * requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. @@ -1337,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; + bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; - bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; int slot, off, cpu, ret; unsigned long flags; void __percpu *ptr; @@ -1361,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || !is_power_of_2(align))) { - WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n", + WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n", size, align); return NULL; } @@ -1482,7 +1485,7 @@ fail_unlock: fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (!is_atomic && warn_limit) { + if (!is_atomic && do_warn && warn_limit) { pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", size, align, is_atomic, err); dump_stack(); @@ -1507,7 +1510,9 @@ fail: * * Allocate zero-filled percpu area of @size bytes aligned at @align. If * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can - * be called from any context but is a lot more likely to fail. + * be called from any context but is a lot more likely to fail. If @gfp + * has __GFP_NOWARN then no warning will be triggered on invalid or failed + * allocation requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. -- cgit v1.2.3 From 82f8dd28bd3abe181b7a66ea4ea132134d37a400 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:53 +0200 Subject: bpf: fix splat for illegal devmap percpu allocation It was reported that syzkaller was able to trigger a splat on devmap percpu allocation due to illegal/unsupported allocation request size passed to __alloc_percpu(): [ 70.094249] illegal size (32776) or align (8) for percpu allocation [ 70.094256] ------------[ cut here ]------------ [ 70.094259] WARNING: CPU: 3 PID: 3451 at mm/percpu.c:1365 pcpu_alloc+0x96/0x630 [...] [ 70.094325] Call Trace: [ 70.094328] __alloc_percpu_gfp+0x12/0x20 [ 70.094330] dev_map_alloc+0x134/0x1e0 [ 70.094331] SyS_bpf+0x9bc/0x1610 [ 70.094333] ? selinux_task_setrlimit+0x5a/0x60 [ 70.094334] ? security_task_setrlimit+0x43/0x60 [ 70.094336] entry_SYSCALL_64_fastpath+0x1a/0xa5 This was due to too large max_entries for the map such that we surpassed the upper limit of PCPU_MIN_UNIT_SIZE. It's fine to fail naturally here, so switch to __alloc_percpu_gfp() and pass __GFP_NOWARN instead. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Reported-by: Mark Rutland Reported-by: Shankara Pailoor Reported-by: Richard Weinberger Signed-off-by: Daniel Borkmann Cc: John Fastabend Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..920428d84da2 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -111,8 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) err = -ENOMEM; /* A per cpu bitfield with a bit per possible net device */ - dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), - __alignof__(unsigned long)); + dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), + __alignof__(unsigned long), + GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) goto free_dtab; -- cgit v1.2.3 From bc6d5031b43a2291de638ab9304320b4cae61689 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:54 +0200 Subject: bpf: do not test for PCPU_MIN_UNIT_SIZE before percpu allocations PCPU_MIN_UNIT_SIZE is an implementation detail of the percpu allocator. Given we support __GFP_NOWARN now, lets just let the allocation request fail naturally instead. The two call sites from BPF mistakenly assumed __GFP_NOWARN would work, so no changes needed to their actual __alloc_percpu_gfp() calls which use the flag already. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- kernel/bpf/hashtab.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..e2636737b69b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); if (array_size >= U32_MAX - PAGE_SIZE || - elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { + bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..6533f08d1238 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; - if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) - /* make sure the size for pcpu_alloc() is reasonable */ - goto free_htab; - htab->elem_size = sizeof(struct htab_elem) + round_up(htab->map.key_size, 8); if (percpu) -- cgit v1.2.3 From df80cd9b28b9ebaa284a41df611dbf3a2d05ca74 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 17 Oct 2017 23:26:10 +0800 Subject: sctp: do not peel off an assoc from one netns to another one Now when peeling off an association to the sock in another netns, all transports in this assoc are not to be rehashed and keep use the old key in hashtable. As a transport uses sk->net as the hash key to insert into hashtable, it would miss removing these transports from hashtable due to the new netns when closing the sock and all transports are being freeed, then later an use-after-free issue could be caused when looking up an asoc and dereferencing those transports. This is a very old issue since very beginning, ChunYu found it with syzkaller fuzz testing with this series: socket$inet6_sctp() bind$inet6() sendto$inet6() unshare(0x40000000) getsockopt$inet_sctp6_SCTP_GET_ASSOC_ID_LIST() getsockopt$inet_sctp6_SCTP_SOCKOPT_PEELOFF() This patch is to block this call when peeling one assoc off from one netns to another one, so that the netns of all transport would not go out-sync with the key in hashtable. Note that this patch didn't fix it by rehashing transports, as it's difficult to handle the situation when the tuple is already in use in the new netns. Besides, no one would like to peel off one assoc to another netns, considering ipaddrs, ifaces, etc. are usually different. Reported-by: ChunYu Wang Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d4730ada7f32..17841ab30798 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4906,6 +4906,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) struct socket *sock; int err = 0; + /* Do not peel off from one netns to another one. */ + if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) + return -EINVAL; + if (!asoc) return -EINVAL; -- cgit v1.2.3 From 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Oct 2017 21:37:49 +0800 Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect Now sctp processes icmp redirect packet in sctp_icmp_redirect where it calls sctp_transport_dst_check in which tp->dst can be released. The problem is before calling sctp_transport_dst_check, it doesn't check sock_owned_by_user, which means tp->dst could be freed while a process is accessing it with owning the socket. An use-after-free issue could be triggered by this. This patch is to fix it by checking sock_owned_by_user before calling sctp_transport_dst_check in sctp_icmp_redirect, so that it would not release tp->dst if users still hold sock lock. Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix routing redirect race") on sctp also needs this check. Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers") Reported-by: Eric Dumazet Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 92a07141fd07..34f10e75f3b9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, { struct dst_entry *dst; - if (!t) + if (sock_owned_by_user(sk) || !t) return; dst = sctp_transport_dst_check(t); if (dst) -- cgit v1.2.3 From 435bf0d3f99a164df7e8c30428cef266b91d1d3b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:15 -0700 Subject: bpf: enforce TCP only support for sockmap Only TCP sockets have been tested and at the moment the state change callback only handles TCP sockets. This adds a check to ensure that sockets actually being added are TCP sockets. For net-next we can consider UDP support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 6 ++++++ tools/testing/selftests/bpf/test_maps.c | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..c68899d5b246 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -840,6 +840,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP) { + fput(socket->file); + return -EOPNOTSUPP; + } + err = sock_map_ctx_update_elem(&skops, map, key, flags); fput(socket->file); return err; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index fe3a443a1102..50ce52d2013d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data) int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; - int err, i, fd, sfd[6] = {0xdeadbeef}; + int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; int parse_prog, verdict_prog; struct sockaddr_in addr; @@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + /* Test update with unsupported UDP socket */ + udp = socket(AF_INET, SOCK_DGRAM, 0); + i = 0; + err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); + if (!err) { + printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + i, udp); + goto out_sockmap; + } + /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); -- cgit v1.2.3 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/tcp.h | 5 ++++ kernel/bpf/sockmap.c | 19 +++++++------- net/core/filter.c | 29 +++++++++++----------- samples/sockmap/sockmap_kern.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +-- 8 files changed, 36 insertions(+), 30 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -39,6 +39,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) return SK_DROP; skb_orphan(skb); + /* We need to ensure that BPF metadata for maps is also cleared + * when we orphan the skb so that we don't have the possibility + * to reference a stale map. + */ + TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; bpf_compute_data_end(skb); + preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); + preempt_enable(); skb->sk = NULL; return rc; @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) struct sock *sk; int rc; - /* Because we use per cpu values to feed input from sock redirect - * in BPF program to do_sk_redirect_map() call we need to ensure we - * are not preempted. RCU read lock is not sufficient in this case - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. - */ - preempt_disable(); rc = smap_verdict_func(psock, skb); switch (rc) { case SK_REDIRECT: - sk = do_sk_redirect_map(); - preempt_enable(); + sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) /* Fall through and free skb otherwise */ case SK_DROP: default: - if (rc != SK_REDIRECT) - preempt_enable(); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 74b8c91fb5f4..ca1ba0bbfbc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); if (unlikely(flags)) return SK_ABORTED; - ri->ifindex = key; - ri->flags = flags; - ri->map = map; + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; return SK_REDIRECT; } -struct sock *do_sk_redirect_map(void) +struct sock *do_sk_redirect_map(struct sk_buff *skb) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct sock *sk = NULL; - if (ri->map) { - sk = __sock_map_lookup_elem(ri->map, ri->ifindex); + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); - ri->ifindex = 0; - ri->map = NULL; - /* we do not clear flags for future lookup */ + tcb->bpf.key = 0; + tcb->bpf.map = NULL; } return sk; @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .func = bpf_sk_redirect_map, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index f9b38ef82dc2..52b0053274f4 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); - return bpf_sk_redirect_map(&sock_map, ret, 0); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); } SEC("sockops") diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..be9a631a69f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -569,9 +569,10 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(map, key, flags) + * int bpf_sk_redirect_map(skb, map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. + * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..b2e02bdcd098 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; -static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = +static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 9b99bd10807d..2cd2d552938b 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) - return bpf_sk_redirect_map(&sock_map_rx, sk, 0); - return bpf_sk_redirect_map(&sock_map_tx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f7e9cb1ecb6d922584abff16db07930162c57155 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:58 -0700 Subject: bpf: remove mark access for SK_SKB program types The skb->mark field is a union with reserved_tailroom which is used in the TCP code paths from stream memory allocation. Allowing SK_SKB programs to set this field creates a conflict with future code optimizations, such as "gifting" the skb to the egress path instead of creating a new skb and doing a memcpy. Because we do not have a released version of SK_SKB yet lets just remove it for now. A more appropriate scratch pad to use at the socket layer is dev_scratch, but lets add that in future kernels when needed. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index ca1ba0bbfbc2..aa0265997f93 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3684,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_index): case bpf_ctx_range(struct __sk_buff, priority): break; @@ -3694,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_classid): return false; case bpf_ctx_range(struct __sk_buff, data): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c7d3a45a3c5..50e15cedbb7f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { - "check skb->mark is writeable by SK_SKB", + "invalid access of skb->mark for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", + }, + { + "check skb->mark is not writeable by SK_SKB", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = ACCEPT, + .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", }, { "check skb->tc_index is writeable by SK_SKB", -- cgit v1.2.3 From fb50df8d32283cd95932a182a46a10070c4a8832 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:11:22 -0700 Subject: bpf: require CAP_NET_ADMIN when using sockmap maps Restrict sockmap to CAP_NET_ADMIN. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index beaabb21c3a3..2b6eb35ae5d3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -486,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) int err = -EINVAL; u64 cost; + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) -- cgit v1.2.3 From 9ef2a8cd5c0dcb8e1f1534615c56eb13b630c363 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:11:44 -0700 Subject: bpf: require CAP_NET_ADMIN when using devmap Devmap is used with XDP which requires CAP_NET_ADMIN so lets also make CAP_NET_ADMIN required to use the map. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 920428d84da2..52e0548ba548 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) int err = -EINVAL; u64 cost; + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) -- cgit v1.2.3 From c92e8c02fe664155ac4234516e32544bec0f113d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 09:04:13 -0700 Subject: tcp/dccp: fix ireq->opt races syzkaller found another bug in DCCP/TCP stacks [1] For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access ireq->opt unless we own the request sock. Note the opt field is renamed to ireq_opt to ease grep games. [1] BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x40c341 RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 Allocated by task 3295: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 __do_kmalloc mm/slab.c:3725 [inline] __kmalloc+0x162/0x760 mm/slab.c:3734 kmalloc include/linux/slab.h:498 [inline] tcp_v4_save_options include/net/tcp.h:1962 [inline] tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3306: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 __sk_destruct+0xfd/0x910 net/core/sock.c:1560 sk_destruct+0x47/0x80 net/core/sock.c:1595 __sk_free+0x57/0x230 net/core/sock.c:1603 sk_free+0x2a/0x40 net/core/sock.c:1614 sock_put include/net/sock.h:1652 [inline] inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- net/dccp/ipv4.c | 13 ++++++++----- net/ipv4/cipso_ipv4.c | 24 +++++++----------------- net/ipv4/inet_connection_sock.c | 8 +++----- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 22 +++++++++++++--------- 7 files changed, 34 insertions(+), 39 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index aa95053dfc78..425752f768d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -96,7 +96,7 @@ struct inet_request_sock { kmemcheck_bitfield_end(flags); u32 ir_mark; union { - struct ip_options_rcu *opt; + struct ip_options_rcu __rcu *ireq_opt; #if IS_ENABLED(CONFIG_IPV6) struct { struct ipv6_txoptions *ipv6_opt; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 001c08696334..0490916864f9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; - newinet->inet_opt = ireq->opt; - ireq->opt = NULL; + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->inet_id = jiffies; @@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - + if (*own_req) + ireq->ireq_opt = NULL; + else + newinet->inet_opt = NULL; return newsk; exit_overflow: @@ -441,6 +443,7 @@ exit: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto exit; @@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req ireq->ir_rmt_addr); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + rcu_dereference(ireq->ireq_opt)); err = net_xmit_eval(err); } @@ -548,7 +551,7 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } void dccp_syn_ack_timeout(const struct request_sock *req) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2ae8f54cb321..82178cc69c96 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg(&req_inet->opt, opt); + opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); if (opt) kfree_rcu(opt, rcu); @@ -1973,11 +1973,13 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) { + struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); int hdr_delta = 0; - struct ip_options_rcu *opt = *opt_ptr; + if (!opt || opt->opt.cipso == 0) + return 0; if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; @@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) */ void cipso_v4_sock_delattr(struct sock *sk) { - int hdr_delta; - struct ip_options_rcu *opt; struct inet_sock *sk_inet; + int hdr_delta; sk_inet = inet_sk(sk); - opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (!opt || opt->opt.cipso == 0) - return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { @@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options_rcu *opt; - struct inet_request_sock *req_inet; - - req_inet = inet_rsk(req); - opt = req_inet->opt; - if (!opt || opt->opt.cipso == 0) - return; - - cipso_v4_delopt(&req_inet->opt); + cipso_v4_delopt(&inet_rsk(req)->ireq_opt); } /** diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 67aec7a10686..5ec9136a7c36 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -540,9 +540,10 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct net *net = read_pnet(&ireq->ireq_net); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -576,10 +577,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct flowi4 *fl4; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); fl4 = &newinet->cork.fl.u.ip4; - rcu_read_lock(); - opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -592,13 +592,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; - rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: - rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b1bb1b3a1082..77cf32a80952 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - ireq->opt = tcp_v4_save_options(sock_net(sk), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5d7656beeee..7eec3383702b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6196,7 +6196,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct inet_request_sock *ireq = inet_rsk(req); kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; + ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 85164d4d3e53..4c43365c374c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + rcu_dereference(ireq->ireq_opt)); err = net_xmit_eval(err); } @@ -889,7 +889,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, */ static void tcp_v4_reqsk_destructor(struct request_sock *req) { - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } #ifdef CONFIG_TCP_MD5SIG @@ -1265,10 +1265,11 @@ static void tcp_v4_init_req(struct request_sock *req, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = sock_net(sk_listener); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, @@ -1355,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; - inet_opt = ireq->opt; - rcu_assign_pointer(newinet->inet_opt, inet_opt); - ireq->opt = NULL; + newinet->inet_saddr = ireq->ir_loc_addr; + inet_opt = rcu_dereference(ireq->ireq_opt); + RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; @@ -1403,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - if (*own_req) + if (likely(*own_req)) { tcp_move_syn(newtp, req); - + ireq->ireq_opt = NULL; + } else { + newinet->inet_opt = NULL; + } return newsk; exit_overflow: @@ -1416,6 +1419,7 @@ exit: tcp_listendrop(sk); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto exit; -- cgit v1.2.3 From 509c7a1ecc8601f94ffba8a00889fefb239c00c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2017 16:14:52 -0700 Subject: packet: avoid panic in packet_getsockopt() syzkaller got crashes in packet_getsockopt() processing PACKET_ROLLOVER_STATS command while another thread was managing to change po->rollover Using RCU will fix this bug. We might later add proper RCU annotations for sparse sake. In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu() variant, as spotted by John. Fixes: a9b6391814d5 ("packet: rollover statistics") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: John Sperbeck Signed-off-by: David S. Miller --- net/packet/af_packet.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bec01a3daf5b..2986941164b1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1769,7 +1769,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) out: if (err && rollover) { - kfree(rollover); + kfree_rcu(rollover, rcu); po->rollover = NULL; } mutex_unlock(&fanout_mutex); @@ -1796,8 +1796,10 @@ static struct packet_fanout *fanout_release(struct sock *sk) else f = NULL; - if (po->rollover) + if (po->rollover) { kfree_rcu(po->rollover, rcu); + po->rollover = NULL; + } } mutex_unlock(&fanout_mutex); @@ -3851,6 +3853,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; + struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3929,13 +3932,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - if (!po->rollover) + rcu_read_lock(); + rollover = rcu_dereference(po->rollover); + if (rollover) { + rstats.tp_all = atomic_long_read(&rollover->num); + rstats.tp_huge = atomic_long_read(&rollover->num_huge); + rstats.tp_failed = atomic_long_read(&rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); + } + rcu_read_unlock(); + if (!rollover) return -EINVAL; - rstats.tp_all = atomic_long_read(&po->rollover->num); - rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); - rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; -- cgit v1.2.3 From 6850d0f8b2542112629061808ed950b35eb982e4 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Thu, 19 Oct 2017 13:43:05 +1100 Subject: net/ncsi: Fix AEN HNCDSC packet length Correct the value of the HNCDSC AEN packet. Fixes: 7a82ecf4cfb85 "net/ncsi: NCSI AEN packet handler" Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 6898e7229285..f135938bf781 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -187,7 +187,7 @@ static struct ncsi_aen_handler { } ncsi_aen_handlers[] = { { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc }, { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr }, - { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc } + { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc } }; int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) -- cgit v1.2.3 From 0795fb2021f07969949f523ea33c39785bfae9d6 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Thu, 19 Oct 2017 13:43:06 +1100 Subject: net/ncsi: Stop monitor if channel times out or is inactive ncsi_channel_monitor() misses stopping the channel monitor in several places that it should, causing a WARN_ON_ONCE() to trigger when the monitor is re-started later, eg: [ 459.040000] WARNING: CPU: 0 PID: 1093 at net/ncsi/ncsi-manage.c:269 ncsi_start_channel_monitor+0x7c/0x90 [ 459.040000] CPU: 0 PID: 1093 Comm: kworker/0:3 Not tainted 4.10.17-gaca2fdd #140 [ 459.040000] Hardware name: ASpeed SoC [ 459.040000] Workqueue: events ncsi_dev_work [ 459.040000] [<80010094>] (unwind_backtrace) from [<8000d950>] (show_stack+0x20/0x24) [ 459.040000] [<8000d950>] (show_stack) from [<801dbf70>] (dump_stack+0x20/0x28) [ 459.040000] [<801dbf70>] (dump_stack) from [<80018d7c>] (__warn+0xe0/0x108) [ 459.040000] [<80018d7c>] (__warn) from [<80018e70>] (warn_slowpath_null+0x30/0x38) [ 459.040000] [<80018e70>] (warn_slowpath_null) from [<803f6a08>] (ncsi_start_channel_monitor+0x7c/0x90) [ 459.040000] [<803f6a08>] (ncsi_start_channel_monitor) from [<803f7664>] (ncsi_configure_channel+0xdc/0x5fc) [ 459.040000] [<803f7664>] (ncsi_configure_channel) from [<803f8160>] (ncsi_dev_work+0xac/0x474) [ 459.040000] [<803f8160>] (ncsi_dev_work) from [<8002d244>] (process_one_work+0x1e0/0x450) [ 459.040000] [<8002d244>] (process_one_work) from [<8002d510>] (worker_thread+0x5c/0x570) [ 459.040000] [<8002d510>] (worker_thread) from [<80033614>] (kthread+0x124/0x164) [ 459.040000] [<80033614>] (kthread) from [<8000a5e8>] (ret_from_fork+0x14/0x2c) This also updates the monitor instead of just returning if ncsi_xmit_cmd() fails to send the get-link-status command so that the monitor properly times out. Fixes: e6f44ed6d04d3 "net/ncsi: Package and channel management" Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index b6a449aa9d4b..b022deb39d31 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -202,11 +202,15 @@ static void ncsi_channel_monitor(unsigned long data) monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || chained) + if (!enabled || chained) { + ncsi_stop_channel_monitor(nc); return; + } if (state != NCSI_CHANNEL_INACTIVE && - state != NCSI_CHANNEL_ACTIVE) + state != NCSI_CHANNEL_ACTIVE) { + ncsi_stop_channel_monitor(nc); return; + } switch (monitor_state) { case NCSI_CHANNEL_MONITOR_START: @@ -217,12 +221,9 @@ static void ncsi_channel_monitor(unsigned long data) nca.type = NCSI_PKT_CMD_GLS; nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); - if (ret) { + if (ret) netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); - return; - } - break; case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; @@ -233,6 +234,8 @@ static void ncsi_channel_monitor(unsigned long data) ndp->flags |= NCSI_DEV_RESHUFFLE; } + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); -- cgit v1.2.3 From 100ef01f3ea4badbee6479290a41f74abd0e523f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:07 +1100 Subject: net/ncsi: Disable HWA mode when no channels are found When there are no NCSI channels probed, HWA (Hardware Arbitration) mode is enabled. It's not correct because HWA depends on the fact: NCSI channels exist and all of them support HWA mode. This disables HWA when no channels are probed. Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index b022deb39d31..0966eff48ce7 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1005,12 +1005,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) struct ncsi_package *np; struct ncsi_channel *nc; unsigned int cap; + bool has_channel = false; /* The hardware arbitration is disabled if any one channel * doesn't support explicitly. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + has_channel = true; + cap = nc->caps[NCSI_CAP_GENERIC].cap; if (!(cap & NCSI_CAP_GENERIC_HWA) || (cap & NCSI_CAP_GENERIC_HWA_MASK) != @@ -1021,8 +1024,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) } } - ndp->flags |= NCSI_DEV_HWA; - return true; + if (has_channel) { + ndp->flags |= NCSI_DEV_HWA; + return true; + } + + ndp->flags &= ~NCSI_DEV_HWA; + return false; } static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) -- cgit v1.2.3 From 52b4c8627f9f0d882e969967a207a27a80c9c753 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:08 +1100 Subject: net/ncsi: Enforce failover on link monitor timeout The NCSI channel has been configured to provide service if its link monitor timer is enabled, regardless of its state (inactive or active). So the timeout event on the link monitor indicates the out-of-service on that channel, for which a failover is needed. This sets NCSI_DEV_RESHUFFLE flag to enforce failover on link monitor timeout, regardless the channel's original state (inactive or active). Also, the link is put into "down" state to give the failing channel lowest priority when selecting for the active channel. The state of failing channel should be set to active in order for deinitialization and failover to be done. Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 0966eff48ce7..28c42b22b748 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -189,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_channel *nc = (struct ncsi_channel *)data; struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; + struct ncsi_channel_mode *ncm; struct ncsi_cmd_arg nca; bool enabled, chained; unsigned int monitor_state; @@ -228,20 +229,21 @@ static void ncsi_channel_monitor(unsigned long data) case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: - if (!(ndp->flags & NCSI_DEV_HWA) && - state == NCSI_CHANNEL_ACTIVE) { + if (!(ndp->flags & NCSI_DEV_HWA)) { ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; } ncsi_stop_channel_monitor(nc); + ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; + ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); - nc->state = NCSI_CHANNEL_INACTIVE; + nc->state = NCSI_CHANNEL_ACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); -- cgit v1.2.3 From 0a90e251988ceedc528c8db98f25b051cf190f44 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:09 +1100 Subject: net/ncsi: Fix length of GVI response packet The length of GVI (GetVersionInfo) response packet should be 40 instead of 36. This issue was found from /sys/kernel/debug/ncsi/eth0/stats. # ethtool --ncsi eth0 swstats : RESPONSE OK TIMEOUT ERROR ======================================= GVI 0 0 2 With this applied, no error reported on GVI response packets: # ethtool --ncsi eth0 swstats : RESPONSE OK TIMEOUT ERROR ======================================= GVI 2 0 0 Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-rsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 265b9a892d41..927dad4759d1 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -959,7 +959,7 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf }, { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf }, { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc }, - { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi }, + { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi }, { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, -- cgit v1.2.3 From b4562ca7925a3bedada87a3dd072dd5bad043288 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 19 Oct 2017 03:33:14 +0000 Subject: hv_sock: add locking in the open/close/release code paths Without the patch, when hvs_open_connection() hasn't completely established a connection (e.g. it has changed sk->sk_state to SS_CONNECTED, but hasn't inserted the sock into the connected queue), vsock_stream_connect() may see the sk_state change and return the connection to the userspace, and next when the userspace closes the connection quickly, hvs_release() may not see the connection in the connected queue; finally hvs_open_connection() inserts the connection into the queue, but we won't be able to purge the connection for ever. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Cathy Avery Cc: Rolf Neugebauer Cc: Marcelo Cerri Signed-off-by: David S. Miller --- net/vmw_vsock/hyperv_transport.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 14ed5a344cdf..e21991fe883a 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -310,11 +310,15 @@ static void hvs_close_connection(struct vmbus_channel *chan) struct sock *sk = get_per_channel_state(chan); struct vsock_sock *vsk = vsock_sk(sk); + lock_sock(sk); + sk->sk_state = SS_UNCONNECTED; sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; sk->sk_state_change(sk); + + release_sock(sk); } static void hvs_open_connection(struct vmbus_channel *chan) @@ -344,6 +348,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (!sk) return; + lock_sock(sk); + if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || (!conn_from_host && sk->sk_state != SS_CONNECTING)) goto out; @@ -395,9 +401,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) vsock_insert_connected(vnew); - lock_sock(sk); vsock_enqueue_accept(sk, new); - release_sock(sk); } else { sk->sk_state = SS_CONNECTED; sk->sk_socket->state = SS_CONNECTED; @@ -410,6 +414,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) out: /* Release refcnt obtained when we called vsock_find_bound_socket() */ sock_put(sk); + + release_sock(sk); } static u32 hvs_get_local_cid(void) @@ -476,13 +482,21 @@ out: static void hvs_release(struct vsock_sock *vsk) { + struct sock *sk = sk_vsock(vsk); struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan = hvs->chan; + struct vmbus_channel *chan; + lock_sock(sk); + + sk->sk_state = SS_DISCONNECTING; + vsock_remove_sock(vsk); + + release_sock(sk); + + chan = hvs->chan; if (chan) hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); - vsock_remove_sock(vsk); } static void hvs_destruct(struct vsock_sock *vsk) -- cgit v1.2.3 From 772e97b57a4aa00170ad505a40ffad31d987ce1d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 19 Oct 2017 13:31:28 +0200 Subject: geneve: Fix function matching VNI and tunnel ID on big-endian On big-endian machines, functions converting between tunnel ID and VNI use the three LSBs of tunnel ID storage to map VNI. The comparison function eq_tun_id_and_vni(), on the other hand, attempted to map the VNI from the three MSBs. Fix it by using the same check implemented on LE, which maps VNI from the three LSBs of tunnel ID. Fixes: 2e0b26e10352 ("geneve: Optimize geneve device lookup.") Signed-off-by: Stefano Brivio Reviewed-by: Jakub Sitnicki Signed-off-by: David S. Miller --- drivers/net/geneve.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f6404074b7b0..ed51018a813e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -113,13 +113,7 @@ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) { -#ifdef __BIG_ENDIAN - return (vni[0] == tun_id[2]) && - (vni[1] == tun_id[1]) && - (vni[2] == tun_id[0]); -#else return !memcmp(vni, &tun_id[5], 3); -#endif } static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) -- cgit v1.2.3 From 197df02cb3d3e969fb1d6fc11f5a634b7bfc2124 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 19 Oct 2017 14:22:17 +0200 Subject: udp: make some messages more descriptive In the UDP code there are two leftover error messages with very few meaning. Replace them with a more descriptive error message as some users reported them as "strange network error". Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e45177ceb0ee..806b298a3bdd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1061,7 +1061,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - net_dbg_ratelimited("cork app bug 2\n"); + net_dbg_ratelimited("socket already corked\n"); err = -EINVAL; goto out; } @@ -1144,7 +1144,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - net_dbg_ratelimited("udp cork app bug 3\n"); + net_dbg_ratelimited("cork failed\n"); return -EINVAL; } -- cgit v1.2.3 From 65e665e68d097edfe667372f13d54f3e4edcb69c Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:53 +0300 Subject: net: aquantia: Reset nic statistics on interface up/down Internal statistics system on chip never gets reset until hardware reboot. This is quite inconvenient in terms of ethtool statistics usage. This patch implements incremental statistics update inside of service callback. Upon nic initialization, first request is done to fetch initial stat data, current collected stat data gets cleared. Internal statistics mailbox readout is improved to save space and increase readability Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 2 + drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 3 + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 + .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 69 +++++++++++++++++----- .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 16 ++++- 6 files changed, 75 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index bf9b3f020e10..3a8baaef053c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -163,6 +163,8 @@ struct aq_hw_ops { int (*hw_get_regs)(struct aq_hw_s *self, struct aq_hw_caps_s *aq_hw_caps, u32 *regs_buff); + int (*hw_update_stats)(struct aq_hw_s *self); + int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data, unsigned int *p_count); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0a5bb4114eb4..6b49dd658012 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -167,6 +167,9 @@ static void aq_nic_service_timer_cb(unsigned long param) self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, self->aq_nic_cfg.is_interrupt_moderation); + if (self->aq_hw_ops.hw_update_stats) + self->aq_hw_ops.hw_update_stats(self->aq_hw); + memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); for (i = AQ_DIMOF(self->aq_vec); i--;) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index c5a02df7a48b..b0747b2486b2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -885,6 +885,7 @@ static struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_a0_hw_rss_set, .hw_rss_hash_set = hw_atl_a0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, + .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 21784cc39dab..6f6e70aa1047 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -939,6 +939,7 @@ static struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_b0_hw_rss_set, .hw_rss_hash_set = hw_atl_b0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, + .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index bf734b32e44b..1fe016fc4bc7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -255,6 +255,15 @@ err_exit: return err; } +int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self, + struct hw_aq_atl_utils_mbox_header *pmbox) +{ + return hw_atl_utils_fw_downld_dwords(self, + PHAL_ATLANTIC->mbox_addr, + (u32 *)(void *)pmbox, + sizeof(*pmbox) / sizeof(u32)); +} + void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, struct hw_aq_atl_utils_mbox *pmbox) { @@ -267,9 +276,6 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, if (err < 0) goto err_exit; - if (pmbox != &PHAL_ATLANTIC->mbox) - memcpy(pmbox, &PHAL_ATLANTIC->mbox, sizeof(*pmbox)); - if (IS_CHIP_FEATURE(REVISION_A0)) { unsigned int mtu = self->aq_nic_cfg ? self->aq_nic_cfg->mtu : 1514U; @@ -299,17 +305,17 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self, { int err = 0; u32 transaction_id = 0; + struct hw_aq_atl_utils_mbox_header mbox; if (state == MPI_RESET) { - hw_atl_utils_mpi_read_stats(self, &PHAL_ATLANTIC->mbox); + hw_atl_utils_mpi_read_mbox(self, &mbox); - transaction_id = PHAL_ATLANTIC->mbox.transaction_id; + transaction_id = mbox.transaction_id; AQ_HW_WAIT_FOR(transaction_id != - (hw_atl_utils_mpi_read_stats - (self, &PHAL_ATLANTIC->mbox), - PHAL_ATLANTIC->mbox.transaction_id), - 1000U, 100U); + (hw_atl_utils_mpi_read_mbox(self, &mbox), + mbox.transaction_id), + 1000U, 100U); if (err < 0) goto err_exit; } @@ -492,16 +498,51 @@ int hw_atl_utils_hw_set_power(struct aq_hw_s *self, return 0; } +int hw_atl_utils_update_stats(struct aq_hw_s *self) +{ + struct hw_atl_s *hw_self = PHAL_ATLANTIC; + struct hw_aq_atl_utils_mbox mbox; + + if (!self->aq_link_status.mbps) + return 0; + + hw_atl_utils_mpi_read_stats(self, &mbox); + +#define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \ + mbox.stats._N_ - hw_self->last_stats._N_) + + AQ_SDELTA(uprc); + AQ_SDELTA(mprc); + AQ_SDELTA(bprc); + AQ_SDELTA(erpt); + + AQ_SDELTA(uptc); + AQ_SDELTA(mptc); + AQ_SDELTA(bptc); + AQ_SDELTA(erpr); + + AQ_SDELTA(ubrc); + AQ_SDELTA(ubtc); + AQ_SDELTA(mbrc); + AQ_SDELTA(mbtc); + AQ_SDELTA(bbrc); + AQ_SDELTA(bbtc); + AQ_SDELTA(dpc); + +#undef AQ_SDELTA + + memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats)); + + return 0; +} + int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, u64 *data, unsigned int *p_count) { - struct hw_atl_stats_s *stats = NULL; + struct hw_atl_s *hw_self = PHAL_ATLANTIC; + struct hw_atl_stats_s *stats = &hw_self->curr_stats; int i = 0; - hw_atl_utils_mpi_read_stats(self, &PHAL_ATLANTIC->mbox); - - stats = &PHAL_ATLANTIC->mbox.stats; - data[i] = stats->uprc + stats->mprc + stats->bprc; data[++i] = stats->uprc; data[++i] = stats->mprc; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index e0360a6b2202..2218bdb605a7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -115,16 +115,21 @@ struct __packed hw_aq_atl_utils_fw_rpc { }; }; -struct __packed hw_aq_atl_utils_mbox { +struct __packed hw_aq_atl_utils_mbox_header { u32 version; u32 transaction_id; - int error; + u32 error; +}; + +struct __packed hw_aq_atl_utils_mbox { + struct hw_aq_atl_utils_mbox_header header; struct hw_atl_stats_s stats; }; struct __packed hw_atl_s { struct aq_hw_s base; - struct hw_aq_atl_utils_mbox mbox; + struct hw_atl_stats_s last_stats; + struct hw_atl_stats_s curr_stats; u64 speed; u32 itr_tx; u32 itr_rx; @@ -170,6 +175,9 @@ enum hal_atl_utils_fw_state_e { void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p); +int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self, + struct hw_aq_atl_utils_mbox_header *pmbox); + void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, struct hw_aq_atl_utils_mbox *pmbox); @@ -199,6 +207,8 @@ int hw_atl_utils_hw_deinit(struct aq_hw_s *self); int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version); +int hw_atl_utils_update_stats(struct aq_hw_s *self); + int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, u64 *data, unsigned int *p_count); -- cgit v1.2.3 From 5d8d84e91d7432cd206b27ad791a11220689ac53 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:54 +0300 Subject: net: aquantia: Add queue restarts stats counter Queue stat strings are cleaned up, duplicate stat name strings removed, queue restarts counter added Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 92 ++++++++-------------- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 3 + 2 files changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a761e91471df..3eab4089e91a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -56,10 +56,6 @@ aq_ethtool_set_link_ksettings(struct net_device *ndev, return aq_nic_set_link_ksettings(aq_nic, cmd); } -/* there "5U" is number of queue[#] stats lines (InPackets+...+InErrors) */ -static const unsigned int aq_ethtool_stat_queue_lines = 5U; -static const unsigned int aq_ethtool_stat_queue_chars = - 5U * ETH_GSTRING_LEN; static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "InPackets", "InUCast", @@ -83,56 +79,26 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "InOctetsDma", "OutOctetsDma", "InDroppedDma", - "Queue[0] InPackets", - "Queue[0] OutPackets", - "Queue[0] InJumboPackets", - "Queue[0] InLroPackets", - "Queue[0] InErrors", - "Queue[1] InPackets", - "Queue[1] OutPackets", - "Queue[1] InJumboPackets", - "Queue[1] InLroPackets", - "Queue[1] InErrors", - "Queue[2] InPackets", - "Queue[2] OutPackets", - "Queue[2] InJumboPackets", - "Queue[2] InLroPackets", - "Queue[2] InErrors", - "Queue[3] InPackets", - "Queue[3] OutPackets", - "Queue[3] InJumboPackets", - "Queue[3] InLroPackets", - "Queue[3] InErrors", - "Queue[4] InPackets", - "Queue[4] OutPackets", - "Queue[4] InJumboPackets", - "Queue[4] InLroPackets", - "Queue[4] InErrors", - "Queue[5] InPackets", - "Queue[5] OutPackets", - "Queue[5] InJumboPackets", - "Queue[5] InLroPackets", - "Queue[5] InErrors", - "Queue[6] InPackets", - "Queue[6] OutPackets", - "Queue[6] InJumboPackets", - "Queue[6] InLroPackets", - "Queue[6] InErrors", - "Queue[7] InPackets", - "Queue[7] OutPackets", - "Queue[7] InJumboPackets", - "Queue[7] InLroPackets", - "Queue[7] InErrors", +}; + +static const char aq_ethtool_queue_stat_names[][ETH_GSTRING_LEN] = { + "Queue[%d] InPackets", + "Queue[%d] OutPackets", + "Queue[%d] Restarts", + "Queue[%d] InJumboPackets", + "Queue[%d] InLroPackets", + "Queue[%d] InErrors", }; static void aq_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); -/* ASSERT: Need add lines to aq_ethtool_stat_names if AQ_CFG_VECS_MAX > 8 */ - BUILD_BUG_ON(AQ_CFG_VECS_MAX > 8); - memset(data, 0, ARRAY_SIZE(aq_ethtool_stat_names) * sizeof(u64)); + memset(data, 0, (ARRAY_SIZE(aq_ethtool_stat_names) + + ARRAY_SIZE(aq_ethtool_queue_stat_names) * + cfg->vecs) * sizeof(u64)); aq_nic_get_stats(aq_nic, data); } @@ -154,8 +120,8 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev, strlcpy(drvinfo->bus_info, pdev ? pci_name(pdev) : "", sizeof(drvinfo->bus_info)); - drvinfo->n_stats = ARRAY_SIZE(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * aq_ethtool_stat_queue_lines; + drvinfo->n_stats = ARRAY_SIZE(aq_ethtool_stat_names) + + cfg->vecs * ARRAY_SIZE(aq_ethtool_queue_stat_names); drvinfo->testinfo_len = 0; drvinfo->regdump_len = regs_count; drvinfo->eedump_len = 0; @@ -164,14 +130,25 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev, static void aq_ethtool_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + int i, si; struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); - - if (stringset == ETH_SS_STATS) - memcpy(data, *aq_ethtool_stat_names, - sizeof(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * - aq_ethtool_stat_queue_chars); + u8 *p = data; + + if (stringset == ETH_SS_STATS) { + memcpy(p, *aq_ethtool_stat_names, + sizeof(aq_ethtool_stat_names)); + p = p + sizeof(aq_ethtool_stat_names); + for (i = 0; i < cfg->vecs; i++) { + for (si = 0; + si < ARRAY_SIZE(aq_ethtool_queue_stat_names); + si++) { + snprintf(p, ETH_GSTRING_LEN, + aq_ethtool_queue_stat_names[si], i); + p += ETH_GSTRING_LEN; + } + } + } } static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset) @@ -182,9 +159,8 @@ static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset) switch (stringset) { case ETH_SS_STATS: - ret = ARRAY_SIZE(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * - aq_ethtool_stat_queue_lines; + ret = ARRAY_SIZE(aq_ethtool_stat_names) + + cfg->vecs * ARRAY_SIZE(aq_ethtool_queue_stat_names); break; default: ret = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index 305ff8ffac2c..5fecc9a099ef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -373,8 +373,11 @@ int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count) memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); aq_vec_add_stats(self, &stats_rx, &stats_tx); + /* This data should mimic aq_ethtool_queue_stat_names structure + */ data[count] += stats_rx.packets; data[++count] += stats_tx.packets; + data[++count] += stats_tx.queue_restarts; data[++count] += stats_rx.jumbo_packets; data[++count] += stats_rx.lro_packets; data[++count] += stats_rx.errors; -- cgit v1.2.3 From 93d87b8fbe6cf17f0ad9552a934b5a6623ccd7d1 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:55 +0300 Subject: net: aquantia: Fixed transient link up/down/up notification When doing ifconfig down/up, driver did not reported carrier_off neither in nic_stop nor in nic_start. That caused link to be visible as "up" during couple of seconds immediately after "ifconfig up". Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 6b49dd658012..9378b4877783 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -337,6 +337,7 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev) } if (netif_running(ndev)) netif_tx_disable(ndev); + netif_carrier_off(self->ndev); for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs; self->aq_vecs++) { @@ -902,6 +903,7 @@ int aq_nic_stop(struct aq_nic_s *self) unsigned int i = 0U; netif_tx_disable(self->ndev); + netif_carrier_off(self->ndev); del_timer_sync(&self->service_timer); -- cgit v1.2.3 From 4c8bb609d304df72858aa2e5e74abab5246bd24b Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:56 +0300 Subject: net: aquantia: Limit number of MSIX irqs to the number of cpus There is no much practical use from having MSIX vectors more that number of cpus, thus cap this first with preconfigured limit, then with number of cpus online. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 4c6c882c6a1c..727f0a446ef1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -85,6 +85,7 @@ int aq_pci_func_init(struct aq_pci_func_s *self) int err = 0; unsigned int bar = 0U; unsigned int port = 0U; + unsigned int numvecs = 0U; err = pci_enable_device(self->pdev); if (err < 0) @@ -142,10 +143,12 @@ int aq_pci_func_init(struct aq_pci_func_s *self) } } - /*enable interrupts */ + numvecs = min((u8)AQ_CFG_VECS_DEF, self->aq_hw_caps.msix_irqs); + numvecs = min(numvecs, num_online_cpus()); + + /* enable interrupts */ #if !AQ_CFG_FORCE_LEGACY_INT - err = pci_alloc_irq_vectors(self->pdev, self->aq_hw_caps.msix_irqs, - self->aq_hw_caps.msix_irqs, PCI_IRQ_MSIX); + err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, PCI_IRQ_MSIX); if (err < 0) { err = pci_alloc_irq_vectors(self->pdev, 1, 1, @@ -153,7 +156,7 @@ int aq_pci_func_init(struct aq_pci_func_s *self) if (err < 0) goto err_exit; } -#endif +#endif /* AQ_CFG_FORCE_LEGACY_INT */ /* net device init */ for (port = 0; port < self->ports; ++port) { -- cgit v1.2.3 From 6849540adc0bcc8c648d7c11be169d2ca267fbca Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:57 +0300 Subject: net: aquantia: mmio unmap was not performed on driver removal That may lead to mmio resource leakage. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 727f0a446ef1..cadaa646c89f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -268,6 +268,9 @@ void aq_pci_func_free(struct aq_pci_func_s *self) aq_nic_ndev_free(self->port[port]); } + if (self->mmio) + iounmap(self->mmio); + kfree(self); err_exit:; -- cgit v1.2.3 From b82ee71a86b0ea66da79a91959d800ffb696a5cb Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:58 +0300 Subject: net: aquantia: Enable coalescing management via ethtool interface Aquantia NIC allows both TX and RX interrupt throttle rate (ITR) management, but this was used in a very limited way via predefined values. This patch allows to setup ITR default values via module command line arguments and via standard ethtool coalescing settings. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 8 ++- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 65 ++++++++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 3 +- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 36 +++++++--- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 4 +- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 20 +++--- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 76 ++++++++++++---------- .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 3 + .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 2 - 9 files changed, 155 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 0fdaaa643073..57e796870595 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -22,8 +22,12 @@ #define AQ_CFG_FORCE_LEGACY_INT 0U -#define AQ_CFG_IS_INTERRUPT_MODERATION_DEF 1U -#define AQ_CFG_INTERRUPT_MODERATION_RATE_DEF 0xFFFFU +#define AQ_CFG_INTERRUPT_MODERATION_OFF 0 +#define AQ_CFG_INTERRUPT_MODERATION_ON 1 +#define AQ_CFG_INTERRUPT_MODERATION_AUTO 0xFFFFU + +#define AQ_CFG_INTERRUPT_MODERATION_USEC_MAX (0x1FF * 2) + #define AQ_CFG_IRQ_MASK 0x1FFU #define AQ_CFG_VECS_MAX 8U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 3eab4089e91a..d5e99b468870 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -221,6 +221,69 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev, return err; } +int aq_ethtool_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); + + if (cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON || + cfg->itr == AQ_CFG_INTERRUPT_MODERATION_AUTO) { + coal->rx_coalesce_usecs = cfg->rx_itr; + coal->tx_coalesce_usecs = cfg->tx_itr; + coal->rx_max_coalesced_frames = 0; + coal->tx_max_coalesced_frames = 0; + } else { + coal->rx_coalesce_usecs = 0; + coal->tx_coalesce_usecs = 0; + coal->rx_max_coalesced_frames = 1; + coal->tx_max_coalesced_frames = 1; + } + return 0; +} + +int aq_ethtool_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); + + /* This is not yet supported + */ + if (coal->use_adaptive_rx_coalesce || coal->use_adaptive_tx_coalesce) + return -EOPNOTSUPP; + + /* Atlantic only supports timing based coalescing + */ + if (coal->rx_max_coalesced_frames > 1 || + coal->rx_coalesce_usecs_irq || + coal->rx_max_coalesced_frames_irq) + return -EOPNOTSUPP; + + if (coal->tx_max_coalesced_frames > 1 || + coal->tx_coalesce_usecs_irq || + coal->tx_max_coalesced_frames_irq) + return -EOPNOTSUPP; + + /* We do not support frame counting. Check this + */ + if (!(coal->rx_max_coalesced_frames == !coal->rx_coalesce_usecs)) + return -EOPNOTSUPP; + if (!(coal->tx_max_coalesced_frames == !coal->tx_coalesce_usecs)) + return -EOPNOTSUPP; + + if (coal->rx_coalesce_usecs > AQ_CFG_INTERRUPT_MODERATION_USEC_MAX || + coal->tx_coalesce_usecs > AQ_CFG_INTERRUPT_MODERATION_USEC_MAX) + return -EINVAL; + + cfg->itr = AQ_CFG_INTERRUPT_MODERATION_ON; + + cfg->rx_itr = coal->rx_coalesce_usecs; + cfg->tx_itr = coal->tx_coalesce_usecs; + + return aq_nic_update_interrupt_moderation_settings(aq_nic); +} + const struct ethtool_ops aq_ethtool_ops = { .get_link = aq_ethtool_get_link, .get_regs_len = aq_ethtool_get_regs_len, @@ -235,4 +298,6 @@ const struct ethtool_ops aq_ethtool_ops = { .get_ethtool_stats = aq_ethtool_stats, .get_link_ksettings = aq_ethtool_get_link_ksettings, .set_link_ksettings = aq_ethtool_set_link_ksettings, + .get_coalesce = aq_ethtool_get_coalesce, + .set_coalesce = aq_ethtool_set_coalesce, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 3a8baaef053c..0207927dc8a6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -151,8 +151,7 @@ struct aq_hw_ops { [ETH_ALEN], u32 count); - int (*hw_interrupt_moderation_set)(struct aq_hw_s *self, - bool itr_enabled); + int (*hw_interrupt_moderation_set)(struct aq_hw_s *self); int (*hw_rss_set)(struct aq_hw_s *self, struct aq_rss_parameters *rss_params); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 9378b4877783..483e97691eea 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -16,6 +16,7 @@ #include "aq_pci_func.h" #include "aq_nic_internal.h" +#include #include #include #include @@ -24,6 +25,18 @@ #include #include +static unsigned int aq_itr = AQ_CFG_INTERRUPT_MODERATION_AUTO; +module_param_named(aq_itr, aq_itr, uint, 0644); +MODULE_PARM_DESC(aq_itr, "Interrupt throttling mode"); + +static unsigned int aq_itr_tx; +module_param_named(aq_itr_tx, aq_itr_tx, uint, 0644); +MODULE_PARM_DESC(aq_itr_tx, "TX interrupt throttle rate"); + +static unsigned int aq_itr_rx; +module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644); +MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate"); + static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) { struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; @@ -61,9 +74,9 @@ static void aq_nic_cfg_init_defaults(struct aq_nic_s *self) cfg->is_polling = AQ_CFG_IS_POLLING_DEF; - cfg->is_interrupt_moderation = AQ_CFG_IS_INTERRUPT_MODERATION_DEF; - cfg->itr = cfg->is_interrupt_moderation ? - AQ_CFG_INTERRUPT_MODERATION_RATE_DEF : 0U; + cfg->itr = aq_itr; + cfg->tx_itr = aq_itr_tx; + cfg->rx_itr = aq_itr_rx; cfg->is_rss = AQ_CFG_IS_RSS_DEF; cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF; @@ -126,10 +139,12 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) if (err) return err; - if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) + if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) { pr_info("%s: link change old %d new %d\n", AQ_CFG_DRV_NAME, self->link_status.mbps, self->aq_hw->aq_link_status.mbps); + aq_nic_update_interrupt_moderation_settings(self); + } self->link_status = self->aq_hw->aq_link_status; if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) { @@ -164,9 +179,6 @@ static void aq_nic_service_timer_cb(unsigned long param) if (err) goto err_exit; - self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, - self->aq_nic_cfg.is_interrupt_moderation); - if (self->aq_hw_ops.hw_update_stats) self->aq_hw_ops.hw_update_stats(self->aq_hw); @@ -425,9 +437,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - err = self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, - self->aq_nic_cfg.is_interrupt_moderation); - if (err < 0) + err = aq_nic_update_interrupt_moderation_settings(self); + if (err) goto err_exit; setup_timer(&self->service_timer, &aq_nic_service_timer_cb, (unsigned long)self); @@ -649,6 +660,11 @@ err_exit: return err; } +int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self) +{ + return self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw); +} + int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags) { int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 0ddd556ff901..4309983acdd6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -40,6 +40,8 @@ struct aq_nic_cfg_s { u32 vecs; /* vecs==allocated irqs */ u32 irq_type; u32 itr; + u16 rx_itr; + u16 tx_itr; u32 num_rss_queues; u32 mtu; u32 ucp_0x364; @@ -49,7 +51,6 @@ struct aq_nic_cfg_s { u16 is_mc_list_enabled; u16 mc_list_count; bool is_autoneg; - bool is_interrupt_moderation; bool is_polling; bool is_rss; bool is_lro; @@ -104,5 +105,6 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self); u32 aq_nic_get_fw_version(struct aq_nic_s *self); int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg); +int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self); #endif /* AQ_NIC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index b0747b2486b2..07b3c49a16a4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -765,24 +765,23 @@ err_exit: return err; } -static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, - bool itr_enabled) +static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self) { unsigned int i = 0U; + u32 itr_rx; - if (itr_enabled && self->aq_nic_cfg->itr) { - if (self->aq_nic_cfg->itr != 0xFFFFU) { + if (self->aq_nic_cfg->itr) { + if (self->aq_nic_cfg->itr != AQ_CFG_INTERRUPT_MODERATION_AUTO) { u32 itr_ = (self->aq_nic_cfg->itr >> 1); itr_ = min(AQ_CFG_IRQ_MASK, itr_); - PHAL_ATLANTIC_A0->itr_rx = 0x80000000U | - (itr_ << 0x10); + itr_rx = 0x80000000U | (itr_ << 0x10); } else { u32 n = 0xFFFFU & aq_hw_read_reg(self, 0x00002A00U); if (n < self->aq_link_status.mbps) { - PHAL_ATLANTIC_A0->itr_rx = 0U; + itr_rx = 0U; } else { static unsigned int hw_timers_tbl_[] = { 0x01CU, /* 10Gbit */ @@ -797,8 +796,7 @@ static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, hw_atl_utils_mbps_2_speed_index( self->aq_link_status.mbps); - PHAL_ATLANTIC_A0->itr_rx = - 0x80000000U | + itr_rx = 0x80000000U | (hw_timers_tbl_[speed_index] << 0x10U); } @@ -806,11 +804,11 @@ static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, aq_hw_write_reg(self, 0x00002A00U, 0x8D000000U); } } else { - PHAL_ATLANTIC_A0->itr_rx = 0U; + itr_rx = 0U; } for (i = HW_ATL_A0_RINGS_MAX; i--;) - reg_irq_thr_set(self, PHAL_ATLANTIC_A0->itr_rx, i); + reg_irq_thr_set(self, itr_rx, i); return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 6f6e70aa1047..11f7e71bf448 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -788,31 +788,37 @@ err_exit: return err; } -static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self, - bool itr_enabled) +static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) { unsigned int i = 0U; + u32 itr_tx = 2U; + u32 itr_rx = 2U; - if (itr_enabled && self->aq_nic_cfg->itr) { + switch (self->aq_nic_cfg->itr) { + case AQ_CFG_INTERRUPT_MODERATION_ON: + case AQ_CFG_INTERRUPT_MODERATION_AUTO: tdm_tx_desc_wr_wb_irq_en_set(self, 0U); tdm_tdm_intr_moder_en_set(self, 1U); rdm_rx_desc_wr_wb_irq_en_set(self, 0U); rdm_rdm_intr_moder_en_set(self, 1U); - PHAL_ATLANTIC_B0->itr_tx = 2U; - PHAL_ATLANTIC_B0->itr_rx = 2U; + if (self->aq_nic_cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON) { + /* HW timers are in 2us units */ + int tx_max_timer = self->aq_nic_cfg->tx_itr / 2; + int tx_min_timer = tx_max_timer / 2; - if (self->aq_nic_cfg->itr != 0xFFFFU) { - unsigned int max_timer = self->aq_nic_cfg->itr / 2U; - unsigned int min_timer = self->aq_nic_cfg->itr / 32U; + int rx_max_timer = self->aq_nic_cfg->rx_itr / 2; + int rx_min_timer = rx_max_timer / 2; - max_timer = min(0x1FFU, max_timer); - min_timer = min(0xFFU, min_timer); + tx_max_timer = min(HW_ATL_INTR_MODER_MAX, tx_max_timer); + tx_min_timer = min(HW_ATL_INTR_MODER_MIN, tx_min_timer); + rx_max_timer = min(HW_ATL_INTR_MODER_MAX, rx_max_timer); + rx_min_timer = min(HW_ATL_INTR_MODER_MIN, rx_min_timer); - PHAL_ATLANTIC_B0->itr_tx |= min_timer << 0x8U; - PHAL_ATLANTIC_B0->itr_tx |= max_timer << 0x10U; - PHAL_ATLANTIC_B0->itr_rx |= min_timer << 0x8U; - PHAL_ATLANTIC_B0->itr_rx |= max_timer << 0x10U; + itr_tx |= tx_min_timer << 0x8U; + itr_tx |= tx_max_timer << 0x10U; + itr_rx |= rx_min_timer << 0x8U; + itr_rx |= rx_max_timer << 0x10U; } else { static unsigned int hw_atl_b0_timers_table_tx_[][2] = { {0xffU, 0xffU}, /* 10Gbit */ @@ -836,34 +842,36 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self, hw_atl_utils_mbps_2_speed_index( self->aq_link_status.mbps); - PHAL_ATLANTIC_B0->itr_tx |= - hw_atl_b0_timers_table_tx_[speed_index] - [0] << 0x8U; /* set min timer value */ - PHAL_ATLANTIC_B0->itr_tx |= - hw_atl_b0_timers_table_tx_[speed_index] - [1] << 0x10U; /* set max timer value */ - - PHAL_ATLANTIC_B0->itr_rx |= - hw_atl_b0_timers_table_rx_[speed_index] - [0] << 0x8U; /* set min timer value */ - PHAL_ATLANTIC_B0->itr_rx |= - hw_atl_b0_timers_table_rx_[speed_index] - [1] << 0x10U; /* set max timer value */ + /* Update user visible ITR settings */ + self->aq_nic_cfg->tx_itr = hw_atl_b0_timers_table_tx_ + [speed_index][1] * 2; + self->aq_nic_cfg->rx_itr = hw_atl_b0_timers_table_rx_ + [speed_index][1] * 2; + + itr_tx |= hw_atl_b0_timers_table_tx_ + [speed_index][0] << 0x8U; + itr_tx |= hw_atl_b0_timers_table_tx_ + [speed_index][1] << 0x10U; + + itr_rx |= hw_atl_b0_timers_table_rx_ + [speed_index][0] << 0x8U; + itr_rx |= hw_atl_b0_timers_table_rx_ + [speed_index][1] << 0x10U; } - } else { + break; + case AQ_CFG_INTERRUPT_MODERATION_OFF: tdm_tx_desc_wr_wb_irq_en_set(self, 1U); tdm_tdm_intr_moder_en_set(self, 0U); rdm_rx_desc_wr_wb_irq_en_set(self, 1U); rdm_rdm_intr_moder_en_set(self, 0U); - PHAL_ATLANTIC_B0->itr_tx = 0U; - PHAL_ATLANTIC_B0->itr_rx = 0U; + itr_tx = 0U; + itr_rx = 0U; + break; } for (i = HW_ATL_B0_RINGS_MAX; i--;) { - reg_tx_intr_moder_ctrl_set(self, - PHAL_ATLANTIC_B0->itr_tx, i); - reg_rx_intr_moder_ctrl_set(self, - PHAL_ATLANTIC_B0->itr_rx, i); + reg_tx_intr_moder_ctrl_set(self, itr_tx, i); + reg_rx_intr_moder_ctrl_set(self, itr_rx, i); } return aq_hw_err_from_flags(self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index fcf89e25a773..9aa2c6edfca2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -139,6 +139,9 @@ #define HW_ATL_B0_FW_VER_EXPECTED 0x01050006U +#define HW_ATL_INTR_MODER_MAX 0x1FF +#define HW_ATL_INTR_MODER_MIN 0xFF + /* Hardware tx descriptor */ struct __packed hw_atl_txd_s { u64 buf_addr; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index 2218bdb605a7..c99cc690e425 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -131,8 +131,6 @@ struct __packed hw_atl_s { struct hw_atl_stats_s last_stats; struct hw_atl_stats_s curr_stats; u64 speed; - u32 itr_tx; - u32 itr_rx; unsigned int chip_features; u32 fw_ver_actual; atomic_t dpc; -- cgit v1.2.3 From 417a3ae4b14909439bb49790f90201f450399845 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:59 +0300 Subject: net: aquantia: Bad udp rate on default interrupt coalescing Default Tx rates cause very long ISR delays on Tx. 0xff is 510us delay, giving only ~ 2000 interrupts per seconds for Tx rings cleanup. With these settings udp tx rate was never higher than ~800Mbps on a single stream. Changing min delay to 0xF makes it way better with ~6Gbps TCP stream performance is almost unaffected by this change, since LSO optimizations play important role. CPU load is affected insignificantly by this change. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 11f7e71bf448..ec68c20efcbd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -821,12 +821,12 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) itr_rx |= rx_max_timer << 0x10U; } else { static unsigned int hw_atl_b0_timers_table_tx_[][2] = { - {0xffU, 0xffU}, /* 10Gbit */ - {0xffU, 0x1ffU}, /* 5Gbit */ - {0xffU, 0x1ffU}, /* 5Gbit 5GS */ - {0xffU, 0x1ffU}, /* 2.5Gbit */ - {0xffU, 0x1ffU}, /* 1Gbit */ - {0xffU, 0x1ffU}, /* 100Mbit */ + {0xfU, 0xffU}, /* 10Gbit */ + {0xfU, 0x1ffU}, /* 5Gbit */ + {0xfU, 0x1ffU}, /* 5Gbit 5GS */ + {0xfU, 0x1ffU}, /* 2.5Gbit */ + {0xfU, 0x1ffU}, /* 1Gbit */ + {0xfU, 0x1ffU}, /* 100Mbit */ }; static unsigned int hw_atl_b0_timers_table_rx_[][2] = { -- cgit v1.2.3 From 8695a5395661fbb4a4f26c97f801f3800ae4754e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 19 Oct 2017 09:03:52 -0700 Subject: bpf: devmap fix arithmetic overflow in bitmap_size calculation An integer overflow is possible in dev_map_bitmap_size() when calculating the BITS_TO_LONG logic which becomes, after macro replacement, (((n) + (d) - 1)/ (d)) where 'n' is a __u32 and 'd' is (8 * sizeof(long)). To avoid overflow cast to u64 before arithmetic. Reported-by: Richard Weinberger Acked-by: Daniel Borkmann Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 52e0548ba548..e745d6a88224 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list); static u64 dev_map_bitmap_size(const union bpf_attr *attr) { - return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); + return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); } static struct bpf_map *dev_map_alloc(union bpf_attr *attr) -- cgit v1.2.3 From fb2a311a31d3457fe8c3ee16f5609877e2ead9f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:21 +0200 Subject: bpf: fix off by one for range markings with L{T, E} patterns During review I noticed that the current logic for direct packet access marking in check_cond_jmp_op() has an off by one for the upper right range border when marking in find_good_pkt_pointers() with BPF_JLT and BPF_JLE. It's not really harmful given access up to pkt_end is always safe, but we should nevertheless correct the range marking before it becomes ABI. If pkt_data' denotes a pkt_data derived pointer (pkt_data + X), then for pkt_data' < pkt_end in the true branch as well as for pkt_end <= pkt_data' in the false branch we mark the range with X although it should really be X - 1 in these cases. For example, X could be pkt_end - pkt_data, then when testing for pkt_data' < pkt_end the verifier simulation cannot deduce that a byte load of pkt_data' - 1 would succeed in this branch. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20f3889c006e..49cb5ad14746 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2430,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } static void find_good_pkt_pointers(struct bpf_verifier_state *state, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + bool range_right_open) { struct bpf_reg_state *regs = state->regs, *reg; + u16 new_range; int i; - if (dst_reg->off < 0) + if (dst_reg->off < 0 || + (dst_reg->off == 0 && range_right_open)) /* This doesn't give us any range */ return; @@ -2446,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, */ return; - /* LLVM can generate four kind of checks: + new_range = dst_reg->off; + if (range_right_open) + new_range--; + + /* Examples for register markings: * - * Type 1/2: + * pkt_data in dst register: * * r2 = r3; * r2 += 8; @@ -2465,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r2=pkt(id=n,off=8,r=0) * r3=pkt(id=n,off=0,r=0) * - * Type 3/4: + * pkt_data in src register: * * r2 = r3; * r2 += 8; @@ -2483,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r3=pkt(id=n,off=0,r=0) * * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access. + * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) + * and [r3, r3 + 8-1) respectively is safe to access depending on + * the check. */ /* If our ids match, then we must have the same max_value. And we @@ -2494,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) /* keep the maximum range already checked */ - regs[i].range = max_t(u16, regs[i].range, dst_reg->off); + regs[i].range = max(regs[i].range, new_range); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) - reg->range = max_t(u16, reg->range, dst_reg->off); + reg->range = max(reg->range, new_range); } } @@ -2865,19 +2874,19 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(this_branch, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(other_branch, dst_reg); + find_good_pkt_pointers(other_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; -- cgit v1.2.3 From 0fd4759c5515b7f2297d7fee5c45e5d9dd733001 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:22 +0200 Subject: bpf: fix pattern matches for direct packet access Alexander had a test program with direct packet access, where the access test was in the form of data + X > data_end. In an unrelated change to the program LLVM decided to swap the branches and emitted code for the test in form of data + X <= data_end. We hadn't seen these being generated previously, thus verifier would reject the program. Therefore, fix up the verifier to detect all test cases, so we don't run into such issues in the future. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Reported-by: Alexander Alemayhu Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 49cb5ad14746..c48ca2a34b5e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2874,18 +2874,42 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' > pkt_end */ find_good_pkt_pointers(this_branch, dst_reg, false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end > pkt_data' */ + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' < pkt_end */ find_good_pkt_pointers(other_branch, dst_reg, true); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end < pkt_data' */ + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' >= pkt_end */ + find_good_pkt_pointers(this_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end >= pkt_data' */ find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' <= pkt_end */ + find_good_pkt_pointers(other_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end <= pkt_data' */ find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); -- cgit v1.2.3 From b37242c773b21edcd566e3bf995fb91d06b9537a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:23 +0200 Subject: bpf: add test cases to bpf selftests to cover all access tests Lets add test cases to cover really all possible direct packet access tests for good/bad access cases so we keep tracking them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 480 ++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 50e15cedbb7f..64ae21f64489 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6671,6 +6671,486 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "XDP pkt read, pkt_end mangling, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end mangling, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 54d431176429e9cf064461589e5174349a9f73da Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 19 Oct 2017 12:40:39 -0400 Subject: sock: correct sk_wmem_queued accounting on efault in tcp zerocopy Syzkaller hits WARN_ON(sk->sk_wmem_queued) in sk_stream_kill_queues after triggering an EFAULT in __zerocopy_sg_from_iter. On this error, skb_zerocopy_stream_iter resets the skb to its state before the operation with __pskb_trim. It cannot kfree_skb like datagram callers, as the skb may have data from a previous send call. __pskb_trim calls skb_condense for unowned skbs, which adjusts their truesize. These tcp skbuffs are owned and their truesize must add up to sk_wmem_queued. But they match because their skb->sk is NULL until tcp_transmit_skb. Temporarily set skb->sk when calling __pskb_trim to signal that the skbuffs are owned and avoid the skb_condense path. Fixes: 52267790ef52 ("sock: add MSG_ZEROCOPY") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e62476beee95..24656076906d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1124,9 +1124,13 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { + struct sock *save_sk = skb->sk; + /* Streams do not free skb on error. Reset to prev state. */ msg->msg_iter = orig_iter; + skb->sk = sk; ___pskb_trim(skb, orig_len); + skb->sk = save_sk; return err; } -- cgit v1.2.3 From 66c54517540cedf5a22911c6b7f5c7d8b5d1e1be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 19 Oct 2017 20:17:32 +0300 Subject: net: bridge: fix returning of vlan range op errors When vlan tunnels were introduced, vlan range errors got silently dropped and instead 0 was returned always. Restore the previous behaviour and return errors to user-space. Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3bc890716c89..de2152730809 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -573,7 +573,7 @@ static int br_process_vlan_info(struct net_bridge *br, } *vinfo_last = NULL; - return 0; + return err; } return br_vlan_info(br, p, cmd, vinfo_curr); -- cgit v1.2.3 From 1b5f962e71bfad6284574655c406597535c3ea7a Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 19 Oct 2017 15:00:29 -0400 Subject: soreuseport: fix initialization race Syzkaller stumbled upon a way to trigger WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41 reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39 There are two initialization paths for the sock_reuseport structure in a socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through SO_ATTACH_REUSEPORT_[CE]BPF before bind. The existing implementation assumedthat the socket lock protected both of these paths when it actually only protects the SO_ATTACH_REUSEPORT path. Syzkaller triggered this double allocation by running these paths concurrently. This patch moves the check for double allocation into the reuseport_alloc function which is protected by a global spin lock. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection") Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 12 +++++++++--- net/ipv4/inet_hashtables.c | 5 +---- net/ipv4/udp.c | 5 +---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index eed1ebf7f29d..b1e0dbea1e8c 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk) * soft irq of receive path or setsockopt from process context */ spin_lock_bh(&reuseport_lock); - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "multiple allocations for the same socket"); + + /* Allocation attempts can occur concurrently via the setsockopt path + * and the bind/hash path. Nothing to do when we lose the race. + */ + if (rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock))) + goto out; + reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { spin_unlock_bh(&reuseport_lock); @@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk) reuse->num_socks = 1; rcu_assign_pointer(sk->sk_reuseport_cb, reuse); +out: spin_unlock_bh(&reuseport_lock); return 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 597bb4cfe805..e7d15fb0d94d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -456,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk, return reuseport_add_sock(sk, sk2); } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } int __inet_hash(struct sock *sk, struct sock *osk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 806b298a3bdd..ebfbccae62fd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } /** -- cgit v1.2.3 From 95491e3cf37840c518d81e1a3a6a8ef554e03c54 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Fri, 20 Oct 2017 01:32:08 +0200 Subject: net: ethtool: remove error check for legacy setting transceiver type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9cab88726929605 ("net: ethtool: Add back transceiver type") restores the transceiver type to struct ethtool_link_settings and convert_link_ksettings_to_legacy_settings() but forgets to remove the error check for the same in convert_legacy_settings_to_link_ksettings(). This prevents older versions of ethtool to change link settings. # ethtool --version ethtool version 3.16 # ethtool -s eth0 autoneg on speed 100 duplex full Cannot set new settings: Invalid argument not setting speed not setting duplex not setting autoneg While newer versions of ethtool works. # ethtool --version ethtool version 4.10 # ethtool -s eth0 autoneg on speed 100 duplex full [ 57.703268] sh-eth ee700000.ethernet eth0: Link is Down [ 59.618227] sh-eth ee700000.ethernet eth0: Link is Up - 100Mbps/Full - flow control rx/tx Fixes: 19cab88726929605 ("net: ethtool: Add back transceiver type") Signed-off-by: Niklas Söderlund Reported-by: Renjith R V Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/core/ethtool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3228411ada0f..9a9a3d77e327 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -436,7 +436,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields - * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated + * maxtxpkt/maxrxpkt. rest of ksettings always updated */ static bool convert_legacy_settings_to_link_ksettings( @@ -451,8 +451,7 @@ convert_legacy_settings_to_link_ksettings( * deprecated legacy fields, and they should not use * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS */ - if (legacy_settings->transceiver || - legacy_settings->maxtxpkt || + if (legacy_settings->maxtxpkt || legacy_settings->maxrxpkt) retval = false; -- cgit v1.2.3 From 14aefd9011f14ecf1f821fcd1754f009f4ab3df9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 20 Oct 2017 09:16:15 +0200 Subject: mlxsw: reg: Add Tunneling IPinIP General Configuration Register The TIGCR register is used for setting up the IPinIP Tunnel configuration. Fixes: ee954d1a91b2 ("mlxsw: spectrum_router: Support GRE tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index cc27c5de5a1d..4afc8486eb9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -6401,6 +6401,36 @@ static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, mlxsw_reg_mgpc_opcode_set(payload, opcode); } +/* TIGCR - Tunneling IPinIP General Configuration Register + * ------------------------------------------------------- + * The TIGCR register is used for setting up the IPinIP Tunnel configuration. + */ +#define MLXSW_REG_TIGCR_ID 0xA801 +#define MLXSW_REG_TIGCR_LEN 0x10 + +MLXSW_REG_DEFINE(tigcr, MLXSW_REG_TIGCR_ID, MLXSW_REG_TIGCR_LEN); + +/* reg_tigcr_ipip_ttlc + * For IPinIP Tunnel encapsulation: whether to copy the ttl from the packet + * header. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttlc, 0x04, 8, 1); + +/* reg_tigcr_ipip_ttl_uc + * The TTL for IPinIP Tunnel encapsulation of unicast packets if + * reg_tigcr_ipip_ttlc is unset. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttl_uc, 0x04, 0, 8); + +static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) +{ + MLXSW_REG_ZERO(tigcr, payload); + mlxsw_reg_tigcr_ttlc_set(payload, ttlc); + mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -6881,6 +6911,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mcc), MLXSW_REG(mcda), MLXSW_REG(mgpc), + MLXSW_REG(tigcr), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), -- cgit v1.2.3 From dcbda2820ff91a692338fed2c99bb9b1af37a05a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 20 Oct 2017 09:16:16 +0200 Subject: mlxsw: spectrum_router: Configure TIGCR on init Spectrum tunnels do not default to ttl of "inherit" like the Linux ones do. Configure TIGCR on router init so that the TTL of tunnel packets is copied from the overlay packets. Fixes: ee954d1a91b2 ("mlxsw: spectrum_router: Support GRE tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c16718d296d3..5189022a1c8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5896,11 +5896,20 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->rifs); } +static int +mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) +{ + char tigcr_pl[MLXSW_REG_TIGCR_LEN]; + + mlxsw_reg_tigcr_pack(tigcr_pl, true, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl); +} + static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); - return 0; + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); } static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) -- cgit v1.2.3 From 9c8080d068b861a80d430ba0b42d8c9b07366b66 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:34 +0100 Subject: net: stmmac: Add missing call to dev_kfree_skb() When RX HW timestamp is enabled and a frame is discarded we are not freeing the skb but instead only setting to NULL the entry. Add a call to dev_kfree_skb_any() so that skb entry is correctly freed. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1763e48c84e2..d67638c7078e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3333,6 +3333,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) * them in stmmac_rx_refill() function so that * device can reuse it. */ + dev_kfree_skb_any(rx_q->rx_skbuff[entry]); rx_q->rx_skbuff[entry] = NULL; dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[entry], -- cgit v1.2.3 From 98870943a561c64aca22d10820a881aa4fa728e4 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:35 +0100 Subject: net: stmmac: Fix stmmac_get_rx_hwtstamp() When using GMAC4 the valid timestamp is from CTX next desc but we are passing the previous desc to get_rx_timestamp_status() callback. Fix this and while at it rework a little bit the function logic. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d67638c7078e..284c10720daf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -473,19 +473,18 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, struct dma_desc *np, struct sk_buff *skb) { struct skb_shared_hwtstamps *shhwtstamp = NULL; + struct dma_desc *desc = p; u64 ns; if (!priv->hwts_rx_en) return; + /* For GMAC4, the valid timestamp is from CTX next desc. */ + if (priv->plat->has_gmac4) + desc = np; /* Check if timestamp is available */ - if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { - /* For GMAC4, the valid timestamp is from CTX next desc. */ - if (priv->plat->has_gmac4) - ns = priv->hw->desc->get_timestamp(np, priv->adv_ts); - else - ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - + if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { + ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); -- cgit v1.2.3 From 9454360dec1c96800576693955b92a2792b74def Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:36 +0100 Subject: net: stmmac: Prevent infinite loop in get_rx_timestamp_status() Prevent infinite loop by correctly setting the loop condition to break when i == 10. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index e0ef02f9503b..4b286e27c4ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -275,7 +275,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) goto exit; i++; - } while ((ret == 1) || (i < 10)); + } while ((ret == 1) && (i < 10)); if (i == 10) ret = -EBUSY; -- cgit v1.2.3 From 6cb3ece9685f78f9b288dd2afea58c35784e40b8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Oct 2017 17:01:22 +0100 Subject: rxrpc: Don't release call mutex on error pointer Don't release call mutex at the end of rxrpc_kernel_begin_call() if the call pointer actually holds an error value. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fb17552fd292..4b0a8288c98a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -308,10 +308,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, gfp); /* The socket has been unlocked. */ - if (!IS_ERR(call)) + if (!IS_ERR(call)) { call->notify_rx = notify_rx; + mutex_unlock(&call->user_mutex); + } - mutex_unlock(&call->user_mutex); _leave(" = %p", call); return call; } -- cgit v1.2.3 From 7433a8d6fa60a2f6910206fa10f3550c8f11f45f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Oct 2017 12:15:52 -0700 Subject: textsearch: fix typos in library helpers Fix spellos (typos) in textsearch library helpers. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- lib/ts_fsm.c | 2 +- lib/ts_kmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c index 5696a35184e4..69557c74ef9f 100644 --- a/lib/ts_fsm.c +++ b/lib/ts_fsm.c @@ -11,7 +11,7 @@ * ========================================================================== * * A finite state machine consists of n states (struct ts_fsm_token) - * representing the pattern as a finite automation. The data is read + * representing the pattern as a finite automaton. The data is read * sequentially on an octet basis. Every state token specifies the number * of recurrences and the type of value accepted which can be either a * specific character or ctype based set of characters. The available diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index 632f783e65f1..ffbe66cbb0ed 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -27,7 +27,7 @@ * * [1] Cormen, Leiserson, Rivest, Stein * Introdcution to Algorithms, 2nd Edition, MIT Press - * [2] See finite automation theory + * [2] See finite automaton theory */ #include -- cgit v1.2.3 From 66bdede495c71da9c5ce18542976fae53642880b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Oct 2017 13:54:03 +0200 Subject: of_mdio: Fix broken PHY IRQ in case of probe deferral If an Ethernet PHY is initialized before the interrupt controller it is connected to, a message like the following is printed: irq: no irq domain found for /interrupt-controller@e61c0000 ! However, the actual error is ignored, leading to a non-functional (POLL) PHY interrupt later: Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=POLL) Depending on whether the PHY driver will fall back to polling, Ethernet may or may not work. To fix this: 1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to of_irq_get(). Unlike the former, the latter returns -EPROBE_DEFER if the interrupt controller is not yet available, so this condition can be detected. Other errors are handled the same as before, i.e. use the passed mdio->irq[addr] as interrupt. 2. Propagate and handle errors from of_mdiobus_register_phy() and of_mdiobus_register_device(). Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index d94dd8b77abd..98258583abb0 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -44,7 +44,7 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id) return -EINVAL; } -static void of_mdiobus_register_phy(struct mii_bus *mdio, +static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *child, u32 addr) { struct phy_device *phy; @@ -60,9 +60,13 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio, else phy = get_phy_device(mdio, addr, is_c45); if (IS_ERR(phy)) - return; + return PTR_ERR(phy); - rc = irq_of_parse_and_map(child, 0); + rc = of_irq_get(child, 0); + if (rc == -EPROBE_DEFER) { + phy_device_free(phy); + return rc; + } if (rc > 0) { phy->irq = rc; mdio->irq[addr] = rc; @@ -84,22 +88,23 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio, if (rc) { phy_device_free(phy); of_node_put(child); - return; + return rc; } dev_dbg(&mdio->dev, "registered phy %s at address %i\n", child->name, addr); + return 0; } -static void of_mdiobus_register_device(struct mii_bus *mdio, - struct device_node *child, u32 addr) +static int of_mdiobus_register_device(struct mii_bus *mdio, + struct device_node *child, u32 addr) { struct mdio_device *mdiodev; int rc; mdiodev = mdio_device_create(mdio, addr); if (IS_ERR(mdiodev)) - return; + return PTR_ERR(mdiodev); /* Associate the OF node with the device structure so it * can be looked up later. @@ -112,11 +117,12 @@ static void of_mdiobus_register_device(struct mii_bus *mdio, if (rc) { mdio_device_free(mdiodev); of_node_put(child); - return; + return rc; } dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n", child->name, addr); + return 0; } /* The following is a list of PHY compatible strings which appear in @@ -219,9 +225,11 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } if (of_mdiobus_child_is_phy(child)) - of_mdiobus_register_phy(mdio, child, addr); + rc = of_mdiobus_register_phy(mdio, child, addr); else - of_mdiobus_register_device(mdio, child, addr); + rc = of_mdiobus_register_device(mdio, child, addr); + if (rc) + goto unregister; } if (!scanphys) @@ -242,12 +250,19 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) dev_info(&mdio->dev, "scan phy %s at address %i\n", child->name, addr); - if (of_mdiobus_child_is_phy(child)) - of_mdiobus_register_phy(mdio, child, addr); + if (of_mdiobus_child_is_phy(child)) { + rc = of_mdiobus_register_phy(mdio, child, addr); + if (rc) + goto unregister; + } } } return 0; + +unregister: + mdiobus_unregister(mdio); + return rc; } EXPORT_SYMBOL(of_mdiobus_register); -- cgit v1.2.3 From 864e2a1f8aac05effac6063ce316b480facb46ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 21 Oct 2017 12:26:23 -0700 Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage When syzkaller team brought us a C repro for the crash [1] that had been reported many times in the past, I finally could find the root cause. If FlowLabel info is merged by fl6_merge_options(), we leave part of the opt_space storage provided by udp/raw/l2tp with random value in opt_space.tot_len, unless a control message was provided at sendmsg() time. Then ip6_setup_cork() would use this random value to perform a kzalloc() call. Undefined behavior and crashes. Fix is to properly set tot_len in fl6_merge_options() At the same time, we can also avoid consuming memory and cpu cycles to clear it, if every option is copied via a kmemdup(). This is the change in ip6_setup_cork(). [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801cb64a100 task.stack: ffff8801cc350000 RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: 0018:ffff8801cc357550 EFLAGS: 00010203 RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010 RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014 RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10 R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0 R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0 FS: 00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0 DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729 udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x358/0x5a0 net/socket.c:1750 SyS_sendto+0x40/0x50 net/socket.c:1718 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4520a9 RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9 RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016 RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029 Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 1 + net/ipv6/ip6_output.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 8081bafe441b..15535ee327c5 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, } opt_space->dst1opt = fopt->dst1opt; opt_space->opt_flen = fopt->opt_flen; + opt_space->tot_len = fopt->tot_len; return opt_space; } EXPORT_SYMBOL_GPL(fl6_merge_options); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 43ca864327c7..5110a418cc4d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1161,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!v6_cork->opt)) return -ENOBUFS; - v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->tot_len = sizeof(*opt); v6_cork->opt->opt_flen = opt->opt_flen; v6_cork->opt->opt_nflen = opt->opt_nflen; -- cgit v1.2.3 From 8d5f4b07174976c55a5f5d6967777373c6826944 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sat, 21 Oct 2017 06:51:30 +0000 Subject: stmmac: Don't access tx_q->dirty_tx before netif_tx_lock This is the possible reason for different hard to reproduce problems on my ARMv7-SMP test system. The symptoms are in recent kernels imprecise external aborts, and in older kernels various kinds of network stalls and unexpected page allocation failures. My testing indicates that the trouble started between v4.5 and v4.6 and prevails up to v4.14. Using the dirty_tx before acquiring the spin lock is clearly wrong and was first introduced with v4.6. Fixes: e3ad57c96715 ("stmmac: review RX/TX ring management") Signed-off-by: Bernd Edlinger Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 284c10720daf..16bd50929084 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1799,12 +1799,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; - unsigned int entry = tx_q->dirty_tx; + unsigned int entry; netif_tx_lock(priv->dev); priv->xstats.tx_clean++; + entry = tx_q->dirty_tx; while (entry != tx_q->cur_tx) { struct sk_buff *skb = tx_q->tx_skbuff[entry]; struct dma_desc *p; -- cgit v1.2.3