diff options
Diffstat (limited to 'drivers/net/ethernet/hisilicon/hns3/hns3_enet.c')
-rw-r--r-- | drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 1225 |
1 files changed, 960 insertions, 265 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 026558f8e04b..cdb5f14fb6bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -53,6 +53,19 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, " Network interface message level setting"); +static unsigned int tx_spare_buf_size; +module_param(tx_spare_buf_size, uint, 0400); +MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer"); + +static unsigned int tx_sgl = 1; +module_param(tx_sgl, uint, 0600); +MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to optimize the IOMMU mapping"); + +#define HNS3_SGL_SIZE(nfrag) (sizeof(struct scatterlist) * (nfrag) + \ + sizeof(struct sg_table)) +#define HNS3_MAX_SGL_SIZE ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM),\ + dma_get_cache_alignment()) + #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_IFDOWN | NETIF_MSG_IFUP) @@ -91,11 +104,284 @@ static const struct pci_device_id hns3_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, hns3_pci_tbl); +#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t) \ + { ptype, \ + l, \ + CHECKSUM_##s, \ + HNS3_L3_TYPE_##t, \ + 1 } + +#define HNS3_RX_PTYPE_UNUSED_ENTRY(ptype) \ + { ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0 } + +static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { + HNS3_RX_PTYPE_UNUSED_ENTRY(0), + HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP), + HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP), + HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP), + HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM), + HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL), + HNS3_RX_PTYPE_UNUSED_ENTRY(9), + HNS3_RX_PTYPE_UNUSED_ENTRY(10), + HNS3_RX_PTYPE_UNUSED_ENTRY(11), + HNS3_RX_PTYPE_UNUSED_ENTRY(12), + HNS3_RX_PTYPE_UNUSED_ENTRY(13), + HNS3_RX_PTYPE_UNUSED_ENTRY(14), + HNS3_RX_PTYPE_UNUSED_ENTRY(15), + HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4), + HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4), + HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4), + HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_UNUSED_ENTRY(26), + HNS3_RX_PTYPE_UNUSED_ENTRY(27), + HNS3_RX_PTYPE_UNUSED_ENTRY(28), + HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_UNUSED_ENTRY(38), + HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_UNUSED_ENTRY(46), + HNS3_RX_PTYPE_UNUSED_ENTRY(47), + HNS3_RX_PTYPE_UNUSED_ENTRY(48), + HNS3_RX_PTYPE_UNUSED_ENTRY(49), + HNS3_RX_PTYPE_UNUSED_ENTRY(50), + HNS3_RX_PTYPE_UNUSED_ENTRY(51), + HNS3_RX_PTYPE_UNUSED_ENTRY(52), + HNS3_RX_PTYPE_UNUSED_ENTRY(53), + HNS3_RX_PTYPE_UNUSED_ENTRY(54), + HNS3_RX_PTYPE_UNUSED_ENTRY(55), + HNS3_RX_PTYPE_UNUSED_ENTRY(56), + HNS3_RX_PTYPE_UNUSED_ENTRY(57), + HNS3_RX_PTYPE_UNUSED_ENTRY(58), + HNS3_RX_PTYPE_UNUSED_ENTRY(59), + HNS3_RX_PTYPE_UNUSED_ENTRY(60), + HNS3_RX_PTYPE_UNUSED_ENTRY(61), + HNS3_RX_PTYPE_UNUSED_ENTRY(62), + HNS3_RX_PTYPE_UNUSED_ENTRY(63), + HNS3_RX_PTYPE_UNUSED_ENTRY(64), + HNS3_RX_PTYPE_UNUSED_ENTRY(65), + HNS3_RX_PTYPE_UNUSED_ENTRY(66), + HNS3_RX_PTYPE_UNUSED_ENTRY(67), + HNS3_RX_PTYPE_UNUSED_ENTRY(68), + HNS3_RX_PTYPE_UNUSED_ENTRY(69), + HNS3_RX_PTYPE_UNUSED_ENTRY(70), + HNS3_RX_PTYPE_UNUSED_ENTRY(71), + HNS3_RX_PTYPE_UNUSED_ENTRY(72), + HNS3_RX_PTYPE_UNUSED_ENTRY(73), + HNS3_RX_PTYPE_UNUSED_ENTRY(74), + HNS3_RX_PTYPE_UNUSED_ENTRY(75), + HNS3_RX_PTYPE_UNUSED_ENTRY(76), + HNS3_RX_PTYPE_UNUSED_ENTRY(77), + HNS3_RX_PTYPE_UNUSED_ENTRY(78), + HNS3_RX_PTYPE_UNUSED_ENTRY(79), + HNS3_RX_PTYPE_UNUSED_ENTRY(80), + HNS3_RX_PTYPE_UNUSED_ENTRY(81), + HNS3_RX_PTYPE_UNUSED_ENTRY(82), + HNS3_RX_PTYPE_UNUSED_ENTRY(83), + HNS3_RX_PTYPE_UNUSED_ENTRY(84), + HNS3_RX_PTYPE_UNUSED_ENTRY(85), + HNS3_RX_PTYPE_UNUSED_ENTRY(86), + HNS3_RX_PTYPE_UNUSED_ENTRY(87), + HNS3_RX_PTYPE_UNUSED_ENTRY(88), + HNS3_RX_PTYPE_UNUSED_ENTRY(89), + HNS3_RX_PTYPE_UNUSED_ENTRY(90), + HNS3_RX_PTYPE_UNUSED_ENTRY(91), + HNS3_RX_PTYPE_UNUSED_ENTRY(92), + HNS3_RX_PTYPE_UNUSED_ENTRY(93), + HNS3_RX_PTYPE_UNUSED_ENTRY(94), + HNS3_RX_PTYPE_UNUSED_ENTRY(95), + HNS3_RX_PTYPE_UNUSED_ENTRY(96), + HNS3_RX_PTYPE_UNUSED_ENTRY(97), + HNS3_RX_PTYPE_UNUSED_ENTRY(98), + HNS3_RX_PTYPE_UNUSED_ENTRY(99), + HNS3_RX_PTYPE_UNUSED_ENTRY(100), + HNS3_RX_PTYPE_UNUSED_ENTRY(101), + HNS3_RX_PTYPE_UNUSED_ENTRY(102), + HNS3_RX_PTYPE_UNUSED_ENTRY(103), + HNS3_RX_PTYPE_UNUSED_ENTRY(104), + HNS3_RX_PTYPE_UNUSED_ENTRY(105), + HNS3_RX_PTYPE_UNUSED_ENTRY(106), + HNS3_RX_PTYPE_UNUSED_ENTRY(107), + HNS3_RX_PTYPE_UNUSED_ENTRY(108), + HNS3_RX_PTYPE_UNUSED_ENTRY(109), + HNS3_RX_PTYPE_UNUSED_ENTRY(110), + HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6), + HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6), + HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6), + HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_UNUSED_ENTRY(120), + HNS3_RX_PTYPE_UNUSED_ENTRY(121), + HNS3_RX_PTYPE_UNUSED_ENTRY(122), + HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_UNUSED_ENTRY(132), + HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_UNUSED_ENTRY(140), + HNS3_RX_PTYPE_UNUSED_ENTRY(141), + HNS3_RX_PTYPE_UNUSED_ENTRY(142), + HNS3_RX_PTYPE_UNUSED_ENTRY(143), + HNS3_RX_PTYPE_UNUSED_ENTRY(144), + HNS3_RX_PTYPE_UNUSED_ENTRY(145), + HNS3_RX_PTYPE_UNUSED_ENTRY(146), + HNS3_RX_PTYPE_UNUSED_ENTRY(147), + HNS3_RX_PTYPE_UNUSED_ENTRY(148), + HNS3_RX_PTYPE_UNUSED_ENTRY(149), + HNS3_RX_PTYPE_UNUSED_ENTRY(150), + HNS3_RX_PTYPE_UNUSED_ENTRY(151), + HNS3_RX_PTYPE_UNUSED_ENTRY(152), + HNS3_RX_PTYPE_UNUSED_ENTRY(153), + HNS3_RX_PTYPE_UNUSED_ENTRY(154), + HNS3_RX_PTYPE_UNUSED_ENTRY(155), + HNS3_RX_PTYPE_UNUSED_ENTRY(156), + HNS3_RX_PTYPE_UNUSED_ENTRY(157), + HNS3_RX_PTYPE_UNUSED_ENTRY(158), + HNS3_RX_PTYPE_UNUSED_ENTRY(159), + HNS3_RX_PTYPE_UNUSED_ENTRY(160), + HNS3_RX_PTYPE_UNUSED_ENTRY(161), + HNS3_RX_PTYPE_UNUSED_ENTRY(162), + HNS3_RX_PTYPE_UNUSED_ENTRY(163), + HNS3_RX_PTYPE_UNUSED_ENTRY(164), + HNS3_RX_PTYPE_UNUSED_ENTRY(165), + HNS3_RX_PTYPE_UNUSED_ENTRY(166), + HNS3_RX_PTYPE_UNUSED_ENTRY(167), + HNS3_RX_PTYPE_UNUSED_ENTRY(168), + HNS3_RX_PTYPE_UNUSED_ENTRY(169), + HNS3_RX_PTYPE_UNUSED_ENTRY(170), + HNS3_RX_PTYPE_UNUSED_ENTRY(171), + HNS3_RX_PTYPE_UNUSED_ENTRY(172), + HNS3_RX_PTYPE_UNUSED_ENTRY(173), + HNS3_RX_PTYPE_UNUSED_ENTRY(174), + HNS3_RX_PTYPE_UNUSED_ENTRY(175), + HNS3_RX_PTYPE_UNUSED_ENTRY(176), + HNS3_RX_PTYPE_UNUSED_ENTRY(177), + HNS3_RX_PTYPE_UNUSED_ENTRY(178), + HNS3_RX_PTYPE_UNUSED_ENTRY(179), + HNS3_RX_PTYPE_UNUSED_ENTRY(180), + HNS3_RX_PTYPE_UNUSED_ENTRY(181), + HNS3_RX_PTYPE_UNUSED_ENTRY(182), + HNS3_RX_PTYPE_UNUSED_ENTRY(183), + HNS3_RX_PTYPE_UNUSED_ENTRY(184), + HNS3_RX_PTYPE_UNUSED_ENTRY(185), + HNS3_RX_PTYPE_UNUSED_ENTRY(186), + HNS3_RX_PTYPE_UNUSED_ENTRY(187), + HNS3_RX_PTYPE_UNUSED_ENTRY(188), + HNS3_RX_PTYPE_UNUSED_ENTRY(189), + HNS3_RX_PTYPE_UNUSED_ENTRY(190), + HNS3_RX_PTYPE_UNUSED_ENTRY(191), + HNS3_RX_PTYPE_UNUSED_ENTRY(192), + HNS3_RX_PTYPE_UNUSED_ENTRY(193), + HNS3_RX_PTYPE_UNUSED_ENTRY(194), + HNS3_RX_PTYPE_UNUSED_ENTRY(195), + HNS3_RX_PTYPE_UNUSED_ENTRY(196), + HNS3_RX_PTYPE_UNUSED_ENTRY(197), + HNS3_RX_PTYPE_UNUSED_ENTRY(198), + HNS3_RX_PTYPE_UNUSED_ENTRY(199), + HNS3_RX_PTYPE_UNUSED_ENTRY(200), + HNS3_RX_PTYPE_UNUSED_ENTRY(201), + HNS3_RX_PTYPE_UNUSED_ENTRY(202), + HNS3_RX_PTYPE_UNUSED_ENTRY(203), + HNS3_RX_PTYPE_UNUSED_ENTRY(204), + HNS3_RX_PTYPE_UNUSED_ENTRY(205), + HNS3_RX_PTYPE_UNUSED_ENTRY(206), + HNS3_RX_PTYPE_UNUSED_ENTRY(207), + HNS3_RX_PTYPE_UNUSED_ENTRY(208), + HNS3_RX_PTYPE_UNUSED_ENTRY(209), + HNS3_RX_PTYPE_UNUSED_ENTRY(210), + HNS3_RX_PTYPE_UNUSED_ENTRY(211), + HNS3_RX_PTYPE_UNUSED_ENTRY(212), + HNS3_RX_PTYPE_UNUSED_ENTRY(213), + HNS3_RX_PTYPE_UNUSED_ENTRY(214), + HNS3_RX_PTYPE_UNUSED_ENTRY(215), + HNS3_RX_PTYPE_UNUSED_ENTRY(216), + HNS3_RX_PTYPE_UNUSED_ENTRY(217), + HNS3_RX_PTYPE_UNUSED_ENTRY(218), + HNS3_RX_PTYPE_UNUSED_ENTRY(219), + HNS3_RX_PTYPE_UNUSED_ENTRY(220), + HNS3_RX_PTYPE_UNUSED_ENTRY(221), + HNS3_RX_PTYPE_UNUSED_ENTRY(222), + HNS3_RX_PTYPE_UNUSED_ENTRY(223), + HNS3_RX_PTYPE_UNUSED_ENTRY(224), + HNS3_RX_PTYPE_UNUSED_ENTRY(225), + HNS3_RX_PTYPE_UNUSED_ENTRY(226), + HNS3_RX_PTYPE_UNUSED_ENTRY(227), + HNS3_RX_PTYPE_UNUSED_ENTRY(228), + HNS3_RX_PTYPE_UNUSED_ENTRY(229), + HNS3_RX_PTYPE_UNUSED_ENTRY(230), + HNS3_RX_PTYPE_UNUSED_ENTRY(231), + HNS3_RX_PTYPE_UNUSED_ENTRY(232), + HNS3_RX_PTYPE_UNUSED_ENTRY(233), + HNS3_RX_PTYPE_UNUSED_ENTRY(234), + HNS3_RX_PTYPE_UNUSED_ENTRY(235), + HNS3_RX_PTYPE_UNUSED_ENTRY(236), + HNS3_RX_PTYPE_UNUSED_ENTRY(237), + HNS3_RX_PTYPE_UNUSED_ENTRY(238), + HNS3_RX_PTYPE_UNUSED_ENTRY(239), + HNS3_RX_PTYPE_UNUSED_ENTRY(240), + HNS3_RX_PTYPE_UNUSED_ENTRY(241), + HNS3_RX_PTYPE_UNUSED_ENTRY(242), + HNS3_RX_PTYPE_UNUSED_ENTRY(243), + HNS3_RX_PTYPE_UNUSED_ENTRY(244), + HNS3_RX_PTYPE_UNUSED_ENTRY(245), + HNS3_RX_PTYPE_UNUSED_ENTRY(246), + HNS3_RX_PTYPE_UNUSED_ENTRY(247), + HNS3_RX_PTYPE_UNUSED_ENTRY(248), + HNS3_RX_PTYPE_UNUSED_ENTRY(249), + HNS3_RX_PTYPE_UNUSED_ENTRY(250), + HNS3_RX_PTYPE_UNUSED_ENTRY(251), + HNS3_RX_PTYPE_UNUSED_ENTRY(252), + HNS3_RX_PTYPE_UNUSED_ENTRY(253), + HNS3_RX_PTYPE_UNUSED_ENTRY(254), + HNS3_RX_PTYPE_UNUSED_ENTRY(255), +}; + +#define HNS3_INVALID_PTYPE \ + ARRAY_SIZE(hns3_rx_ptype_tbl) + static irqreturn_t hns3_irq_handle(int irq, void *vector) { struct hns3_enet_tqp_vector *tqp_vector = vector; napi_schedule_irqoff(&tqp_vector->napi); + tqp_vector->event_cnt++; return IRQ_HANDLED; } @@ -199,6 +485,8 @@ static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); + cancel_work_sync(&tqp_vector->rx_group.dim.work); + cancel_work_sync(&tqp_vector->tx_group.dim.work); } void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, @@ -357,7 +645,7 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev) return 0; } -static u16 hns3_get_max_available_channels(struct hnae3_handle *h) +u16 hns3_get_max_available_channels(struct hnae3_handle *h) { u16 alloc_tqps, max_rss_size, rss_size; @@ -633,13 +921,10 @@ static u8 hns3_get_netdev_flags(struct net_device *netdev) { u8 flags = 0; - if (netdev->flags & IFF_PROMISC) { + if (netdev->flags & IFF_PROMISC) flags = HNAE3_USER_UPE | HNAE3_USER_MPE | HNAE3_BPE; - } else { - flags |= HNAE3_VLAN_FLTR; - if (netdev->flags & IFF_ALLMULTI) - flags |= HNAE3_USER_MPE; - } + else if (netdev->flags & IFF_ALLMULTI) + flags = HNAE3_USER_MPE; return flags; } @@ -669,23 +954,202 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle) ops->request_update_promisc_mode(handle); } -void hns3_enable_vlan_filter(struct net_device *netdev, bool enable) +static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); - bool last_state; + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntc, ntu; - if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && - h->ae_algo->ops->enable_vlan_filter) { - last_state = h->netdev_flags & HNAE3_VLAN_FLTR ? true : false; - if (enable != last_state) { - netdev_info(netdev, - "%s vlan filter\n", - enable ? "enable" : "disable"); - h->ae_algo->ops->enable_vlan_filter(h, enable); + /* This smp_load_acquire() pairs with smp_store_release() in + * hns3_tx_spare_update() called in tx desc cleaning process. + */ + ntc = smp_load_acquire(&tx_spare->last_to_clean); + ntu = tx_spare->next_to_use; + + if (ntc > ntu) + return ntc - ntu - 1; + + /* The free tx buffer is divided into two part, so pick the + * larger one. + */ + return (ntc > (tx_spare->len - ntu) ? ntc : + (tx_spare->len - ntu)) - 1; +} + +static void hns3_tx_spare_update(struct hns3_enet_ring *ring) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + if (!tx_spare || + tx_spare->last_to_clean == tx_spare->next_to_clean) + return; + + /* This smp_store_release() pairs with smp_load_acquire() in + * hns3_tx_spare_space() called in xmit process. + */ + smp_store_release(&tx_spare->last_to_clean, + tx_spare->next_to_clean); +} + +static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring, + struct sk_buff *skb, + u32 space) +{ + u32 len = skb->len <= ring->tx_copybreak ? skb->len : + skb_headlen(skb); + + if (len > ring->tx_copybreak) + return false; + + if (ALIGN(len, dma_get_cache_alignment()) > space) { + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_spare_full++; + u64_stats_update_end(&ring->syncp); + return false; + } + + return true; +} + +static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, + struct sk_buff *skb, + u32 space) +{ + if (skb->len <= ring->tx_copybreak || !tx_sgl || + (!skb_has_frag_list(skb) && + skb_shinfo(skb)->nr_frags < tx_sgl)) + return false; + + if (space < HNS3_MAX_SGL_SIZE) { + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_spare_full++; + u64_stats_update_end(&ring->syncp); + return false; + } + + return true; +} + +static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) +{ + struct hns3_tx_spare *tx_spare; + struct page *page; + u32 alloc_size; + dma_addr_t dma; + int order; + + alloc_size = tx_spare_buf_size ? tx_spare_buf_size : + ring->tqp->handle->kinfo.tx_spare_buf_size; + if (!alloc_size) + return; + + order = get_order(alloc_size); + tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare), + GFP_KERNEL); + if (!tx_spare) { + /* The driver still work without the tx spare buffer */ + dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n"); + return; + } + + page = alloc_pages_node(dev_to_node(ring_to_dev(ring)), + GFP_KERNEL, order); + if (!page) { + dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n"); + devm_kfree(ring_to_dev(ring), tx_spare); + return; + } + + dma = dma_map_page(ring_to_dev(ring), page, 0, + PAGE_SIZE << order, DMA_TO_DEVICE); + if (dma_mapping_error(ring_to_dev(ring), dma)) { + dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n"); + put_page(page); + devm_kfree(ring_to_dev(ring), tx_spare); + return; + } + + tx_spare->dma = dma; + tx_spare->buf = page_address(page); + tx_spare->len = PAGE_SIZE << order; + ring->tx_spare = tx_spare; +} + +/* Use hns3_tx_spare_space() to make sure there is enough buffer + * before calling below function to allocate tx buffer. + */ +static void *hns3_tx_spare_alloc(struct hns3_enet_ring *ring, + unsigned int size, dma_addr_t *dma, + u32 *cb_len) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntu = tx_spare->next_to_use; + + size = ALIGN(size, dma_get_cache_alignment()); + *cb_len = size; + + /* Tx spare buffer wraps back here because the end of + * freed tx buffer is not enough. + */ + if (ntu + size > tx_spare->len) { + *cb_len += (tx_spare->len - ntu); + ntu = 0; + } + + tx_spare->next_to_use = ntu + size; + if (tx_spare->next_to_use == tx_spare->len) + tx_spare->next_to_use = 0; + + *dma = tx_spare->dma + ntu; + + return tx_spare->buf + ntu; +} + +static void hns3_tx_spare_rollback(struct hns3_enet_ring *ring, u32 len) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + if (len > tx_spare->next_to_use) { + len -= tx_spare->next_to_use; + tx_spare->next_to_use = tx_spare->len - len; + } else { + tx_spare->next_to_use -= len; + } +} + +static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring, + struct hns3_desc_cb *cb) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntc = tx_spare->next_to_clean; + u32 len = cb->length; + + tx_spare->next_to_clean += len; + + if (tx_spare->next_to_clean >= tx_spare->len) { + tx_spare->next_to_clean -= tx_spare->len; + + if (tx_spare->next_to_clean) { + ntc = 0; + len = tx_spare->next_to_clean; } } + + /* This tx spare buffer is only really reclaimed after calling + * hns3_tx_spare_update(), so it is still safe to use the info in + * the tx buffer to do the dma sync or sg unmapping after + * tx_spare->next_to_clean is moved forword. + */ + if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) { + dma_addr_t dma = tx_spare->dma + ntc; + + dma_sync_single_for_cpu(ring_to_dev(ring), dma, len, + DMA_TO_DEVICE); + } else { + struct sg_table *sgt = tx_spare->buf + ntc; + + dma_unmap_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents, + DMA_TO_DEVICE); + } } static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, @@ -1159,40 +1623,14 @@ out_hw_tx_csum: return 0; } -static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, - unsigned int size, enum hns_desc_type type) +static int hns3_fill_desc(struct hns3_enet_ring *ring, dma_addr_t dma, + unsigned int size) { #define HNS3_LIKELY_BD_NUM 1 - struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct hns3_desc *desc = &ring->desc[ring->next_to_use]; - struct device *dev = ring_to_dev(ring); - skb_frag_t *frag; unsigned int frag_buf_num; int k, sizeoflast; - dma_addr_t dma; - - if (type == DESC_TYPE_FRAGLIST_SKB || - type == DESC_TYPE_SKB) { - struct sk_buff *skb = (struct sk_buff *)priv; - - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - } else { - frag = (skb_frag_t *)priv; - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - } - - if (unlikely(dma_mapping_error(dev, dma))) { - u64_stats_update_begin(&ring->syncp); - ring->stats.sw_err_cnt++; - u64_stats_update_end(&ring->syncp); - return -ENOMEM; - } - - desc_cb->priv = priv; - desc_cb->length = size; - desc_cb->dma = dma; - desc_cb->type = type; if (likely(size <= HNS3_MAX_BD_SIZE)) { desc->addr = cpu_to_le64(dma); @@ -1228,6 +1666,52 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, return frag_buf_num; } +static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv, + unsigned int type) +{ + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + struct device *dev = ring_to_dev(ring); + unsigned int size; + dma_addr_t dma; + + if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) { + struct sk_buff *skb = (struct sk_buff *)priv; + + size = skb_headlen(skb); + if (!size) + return 0; + + dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); + } else if (type & DESC_TYPE_BOUNCE_HEAD) { + /* Head data has been filled in hns3_handle_tx_bounce(), + * just return 0 here. + */ + return 0; + } else { + skb_frag_t *frag = (skb_frag_t *)priv; + + size = skb_frag_size(frag); + if (!size) + return 0; + + dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); + } + + if (unlikely(dma_mapping_error(dev, dma))) { + u64_stats_update_begin(&ring->syncp); + ring->stats.sw_err_cnt++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + desc_cb->priv = priv; + desc_cb->length = size; + desc_cb->dma = dma; + desc_cb->type = type; + + return hns3_fill_desc(ring, dma, size); +} + static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size, unsigned int bd_num) { @@ -1451,6 +1935,7 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) for (i = 0; i < ring->desc_num; i++) { struct hns3_desc *desc = &ring->desc[ring->next_to_use]; + struct hns3_desc_cb *desc_cb; memset(desc, 0, sizeof(*desc)); @@ -1461,52 +1946,44 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) /* rollback one */ ring_ptr_move_bw(ring, next_to_use); - if (!ring->desc_cb[ring->next_to_use].dma) + desc_cb = &ring->desc_cb[ring->next_to_use]; + + if (!desc_cb->dma) continue; /* unmap the descriptor dma address */ - if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB || - ring->desc_cb[ring->next_to_use].type == - DESC_TYPE_FRAGLIST_SKB) - dma_unmap_single(dev, - ring->desc_cb[ring->next_to_use].dma, - ring->desc_cb[ring->next_to_use].length, - DMA_TO_DEVICE); - else if (ring->desc_cb[ring->next_to_use].length) - dma_unmap_page(dev, - ring->desc_cb[ring->next_to_use].dma, - ring->desc_cb[ring->next_to_use].length, + if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB)) + dma_unmap_single(dev, desc_cb->dma, desc_cb->length, + DMA_TO_DEVICE); + else if (desc_cb->type & + (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) + hns3_tx_spare_rollback(ring, desc_cb->length); + else if (desc_cb->length) + dma_unmap_page(dev, desc_cb->dma, desc_cb->length, DMA_TO_DEVICE); - ring->desc_cb[ring->next_to_use].length = 0; - ring->desc_cb[ring->next_to_use].dma = 0; - ring->desc_cb[ring->next_to_use].type = DESC_TYPE_UNKNOWN; + desc_cb->length = 0; + desc_cb->dma = 0; + desc_cb->type = DESC_TYPE_UNKNOWN; } } static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, - struct sk_buff *skb, enum hns_desc_type type) + struct sk_buff *skb, unsigned int type) { - unsigned int size = skb_headlen(skb); struct sk_buff *frag_skb; int i, ret, bd_num = 0; - if (size) { - ret = hns3_fill_desc(ring, skb, size, type); - if (unlikely(ret < 0)) - return ret; + ret = hns3_map_and_fill_desc(ring, skb, type); + if (unlikely(ret < 0)) + return ret; - bd_num += ret; - } + bd_num += ret; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - size = skb_frag_size(frag); - if (!size) - continue; - - ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE); + ret = hns3_map_and_fill_desc(ring, frag, DESC_TYPE_PAGE); if (unlikely(ret < 0)) return ret; @@ -1546,6 +2023,153 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, WRITE_ONCE(ring->last_to_use, ring->next_to_use); } +static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb, + struct hns3_desc *desc) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!(h->ae_algo->ops->set_tx_hwts_info && + h->ae_algo->ops->set_tx_hwts_info(h, skb))) + return; + + desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B)); +} + +static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + unsigned int type = DESC_TYPE_BOUNCE_HEAD; + unsigned int size = skb_headlen(skb); + dma_addr_t dma; + int bd_num = 0; + u32 cb_len; + void *buf; + int ret; + + if (skb->len <= ring->tx_copybreak) { + size = skb->len; + type = DESC_TYPE_BOUNCE_ALL; + } + + /* hns3_can_use_tx_bounce() is called to ensure the below + * function can always return the tx buffer. + */ + buf = hns3_tx_spare_alloc(ring, size, &dma, &cb_len); + + ret = skb_copy_bits(skb, 0, buf, size); + if (unlikely(ret < 0)) { + hns3_tx_spare_rollback(ring, cb_len); + u64_stats_update_begin(&ring->syncp); + ring->stats.copy_bits_err++; + u64_stats_update_end(&ring->syncp); + return ret; + } + + desc_cb->priv = skb; + desc_cb->length = cb_len; + desc_cb->dma = dma; + desc_cb->type = type; + + bd_num += hns3_fill_desc(ring, dma, size); + + if (type == DESC_TYPE_BOUNCE_HEAD) { + ret = hns3_fill_skb_to_desc(ring, skb, + DESC_TYPE_BOUNCE_HEAD); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + dma_sync_single_for_device(ring_to_dev(ring), dma, size, + DMA_TO_DEVICE); + + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_bounce++; + u64_stats_update_end(&ring->syncp); + return bd_num; +} + +static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + u32 nfrag = skb_shinfo(skb)->nr_frags + 1; + struct sg_table *sgt; + int i, bd_num = 0; + dma_addr_t dma; + u32 cb_len; + int nents; + + if (skb_has_frag_list(skb)) + nfrag = HNS3_MAX_TSO_BD_NUM; + + /* hns3_can_use_tx_sgl() is called to ensure the below + * function can always return the tx buffer. + */ + sgt = hns3_tx_spare_alloc(ring, HNS3_SGL_SIZE(nfrag), + &dma, &cb_len); + + /* scatterlist follows by the sg table */ + sgt->sgl = (struct scatterlist *)(sgt + 1); + sg_init_table(sgt->sgl, nfrag); + nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + if (unlikely(nents < 0)) { + hns3_tx_spare_rollback(ring, cb_len); + u64_stats_update_begin(&ring->syncp); + ring->stats.skb2sgl_err++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + sgt->orig_nents = nents; + sgt->nents = dma_map_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents, + DMA_TO_DEVICE); + if (unlikely(!sgt->nents)) { + hns3_tx_spare_rollback(ring, cb_len); + u64_stats_update_begin(&ring->syncp); + ring->stats.map_sg_err++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + desc_cb->priv = skb; + desc_cb->length = cb_len; + desc_cb->dma = dma; + desc_cb->type = DESC_TYPE_SGL_SKB; + + for (i = 0; i < sgt->nents; i++) + bd_num += hns3_fill_desc(ring, sg_dma_address(sgt->sgl + i), + sg_dma_len(sgt->sgl + i)); + + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_sgl++; + u64_stats_update_end(&ring->syncp); + + return bd_num; +} + +static int hns3_handle_desc_filling(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + u32 space; + + if (!ring->tx_spare) + goto out; + + space = hns3_tx_spare_space(ring); + + if (hns3_can_use_tx_sgl(ring, skb, space)) + return hns3_handle_tx_sgl(ring, skb); + + if (hns3_can_use_tx_bounce(ring, skb, space)) + return hns3_handle_tx_bounce(ring, skb); + +out: + return hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); +} + netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -1592,16 +2216,22 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) * zero, which is unlikely, and 'ret > 0' means how many tx desc * need to be notified to the hw. */ - ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); + ret = hns3_handle_desc_filling(ring, skb); if (unlikely(ret <= 0)) goto fill_err; pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) : (ring->desc_num - 1); + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + hns3_tsyn(netdev, skb, &ring->desc[pre_ntu]); + ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_FE_B)); trace_hns3_tx_desc(ring, pre_ntu); + skb_tx_timestamp(skb); + /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring->queue_index); doorbell = __netdev_tx_sent_queue(dev_queue, desc_cb->send_bytes, @@ -1705,6 +2335,14 @@ static int hns3_nic_set_features(struct net_device *netdev, return -EINVAL; } + if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) && + h->ae_algo->ops->enable_vlan_filter) { + enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER); + ret = h->ae_algo->ops->enable_vlan_filter(h, enable); + if (ret) + return ret; + } + netdev->features = features; return 0; } @@ -1780,6 +2418,9 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_drop += ring->stats.tx_tso_err; tx_drop += ring->stats.over_max_recursion; tx_drop += ring->stats.hw_limitation; + tx_drop += ring->stats.copy_bits_err; + tx_drop += ring->stats.skb2sgl_err; + tx_drop += ring->stats.map_sg_err; tx_errors += ring->stats.sw_err_cnt; tx_errors += ring->stats.tx_vlan_err; tx_errors += ring->stats.tx_l4_proto_err; @@ -1787,6 +2428,9 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_errors += ring->stats.tx_tso_err; tx_errors += ring->stats.over_max_recursion; tx_errors += ring->stats.hw_limitation; + tx_errors += ring->stats.copy_bits_err; + tx_errors += ring->stats.skb2sgl_err; + tx_errors += ring->stats.map_sg_err; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); /* fetch the rx stats */ @@ -2550,6 +3194,9 @@ static void hns3_set_default_feature(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HW_TC; } + + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static int hns3_alloc_buffer(struct hns3_enet_ring *ring, @@ -2577,7 +3224,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, static void hns3_free_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb, int budget) { - if (cb->type == DESC_TYPE_SKB) + if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD | + DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB)) napi_consume_skb(cb->priv, budget); else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias) __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); @@ -2598,12 +3246,15 @@ static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb) static void hns3_unmap_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb) { - if (cb->type == DESC_TYPE_SKB || cb->type == DESC_TYPE_FRAGLIST_SKB) + if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB)) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else if (cb->length) + else if ((cb->type & DESC_TYPE_PAGE) && cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); + else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD | + DESC_TYPE_SGL_SKB)) + hns3_tx_spare_reclaim_cb(ring, cb); } static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i) @@ -2755,7 +3406,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, desc_cb = &ring->desc_cb[ntc]; - if (desc_cb->type == DESC_TYPE_SKB) { + if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL | + DESC_TYPE_BOUNCE_HEAD | + DESC_TYPE_SGL_SKB)) { (*pkts)++; (*bytes) += desc_cb->send_bytes; } @@ -2778,6 +3431,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, * ring_space called by hns3_nic_net_xmit. */ smp_store_release(&ring->next_to_clean, ntc); + + hns3_tx_spare_update(ring); + return true; } @@ -2869,7 +3525,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, static bool hns3_can_reuse_page(struct hns3_desc_cb *cb) { - return (page_count(cb->priv) - cb->pagecnt_bias) == 1; + return page_count(cb->priv) == cb->pagecnt_bias; } static void hns3_nic_reuse_page(struct sk_buff *skb, int i, @@ -2877,40 +3533,74 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, struct hns3_desc_cb *desc_cb) { struct hns3_desc *desc = &ring->desc[ring->next_to_clean]; + u32 frag_offset = desc_cb->page_offset + pull_len; int size = le16_to_cpu(desc->rx.size); u32 truesize = hns3_buf_size(ring); + u32 frag_size = size - pull_len; + bool reused; - desc_cb->pagecnt_bias--; - skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, - size - pull_len, truesize); + /* Avoid re-using remote or pfmem page */ + if (unlikely(!dev_page_is_reusable(desc_cb->priv))) + goto out; - /* Avoid re-using remote and pfmemalloc pages, or the stack is still - * using the page when page_offset rollback to zero, flag default - * unreuse + reused = hns3_can_reuse_page(desc_cb); + + /* Rx page can be reused when: + * 1. Rx page is only owned by the driver when page_offset + * is zero, which means 0 @ truesize will be used by + * stack after skb_add_rx_frag() is called, and the rest + * of rx page can be reused by driver. + * Or + * 2. Rx page is only owned by the driver when page_offset + * is non-zero, which means page_offset @ truesize will + * be used by stack after skb_add_rx_frag() is called, + * and 0 @ truesize can be reused by driver. */ - if (!dev_page_is_reusable(desc_cb->priv) || - (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) { - __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); - return; - } + if ((!desc_cb->page_offset && reused) || + ((desc_cb->page_offset + truesize + truesize) <= + hns3_page_size(ring) && desc_cb->page_offset)) { + desc_cb->page_offset += truesize; + desc_cb->reuse_flag = 1; + } else if (desc_cb->page_offset && reused) { + desc_cb->page_offset = 0; + desc_cb->reuse_flag = 1; + } else if (frag_size <= ring->rx_copybreak) { + void *frag = napi_alloc_frag(frag_size); + + if (unlikely(!frag)) { + u64_stats_update_begin(&ring->syncp); + ring->stats.frag_alloc_err++; + u64_stats_update_end(&ring->syncp); - /* Move offset up to the next cache line */ - desc_cb->page_offset += truesize; + hns3_rl_err(ring_to_netdev(ring), + "failed to allocate rx frag\n"); + goto out; + } - if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) { - desc_cb->reuse_flag = 1; - } else if (hns3_can_reuse_page(desc_cb)) { desc_cb->reuse_flag = 1; - desc_cb->page_offset = 0; - } else if (desc_cb->pagecnt_bias) { - __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); + memcpy(frag, desc_cb->buf + frag_offset, frag_size); + skb_add_rx_frag(skb, i, virt_to_page(frag), + offset_in_page(frag), frag_size, frag_size); + + u64_stats_update_begin(&ring->syncp); + ring->stats.frag_alloc++; + u64_stats_update_end(&ring->syncp); return; } +out: + desc_cb->pagecnt_bias--; + if (unlikely(!desc_cb->pagecnt_bias)) { page_ref_add(desc_cb->priv, USHRT_MAX); desc_cb->pagecnt_bias = USHRT_MAX; } + + skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset, + frag_size, truesize); + + if (unlikely(!desc_cb->reuse_flag)) + __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); } static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) @@ -2971,51 +3661,31 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) return 0; } -static void hns3_checksum_complete(struct hns3_enet_ring *ring, - struct sk_buff *skb, u32 l234info) +static bool hns3_checksum_complete(struct hns3_enet_ring *ring, + struct sk_buff *skb, u32 ptype, u16 csum) { - u32 lo, hi; + if (ptype == HNS3_INVALID_PTYPE || + hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE) + return false; u64_stats_update_begin(&ring->syncp); ring->stats.csum_complete++; u64_stats_update_end(&ring->syncp); skb->ip_summed = CHECKSUM_COMPLETE; - lo = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_L_M, - HNS3_RXD_L2_CSUM_L_S); - hi = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_H_M, - HNS3_RXD_L2_CSUM_H_S); - skb->csum = csum_unfold((__force __sum16)(lo | hi << 8)); + skb->csum = csum_unfold((__force __sum16)csum); + + return true; } -static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, - u32 l234info, u32 bd_base_info, u32 ol_info) +static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info, + u32 ol_info, u32 ptype) { - struct net_device *netdev = ring_to_netdev(ring); int l3_type, l4_type; int ol4_type; - skb->ip_summed = CHECKSUM_NONE; - - skb_checksum_none_assert(skb); - - if (!(netdev->features & NETIF_F_RXCSUM)) - return; - - if (l234info & BIT(HNS3_RXD_L2_CSUM_B)) { - hns3_checksum_complete(ring, skb, l234info); - return; - } - - /* check if hardware has done checksum */ - if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) - return; - - if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | - BIT(HNS3_RXD_OL3E_B) | - BIT(HNS3_RXD_OL4E_B)))) { - u64_stats_update_begin(&ring->syncp); - ring->stats.l3l4_csum_err++; - u64_stats_update_end(&ring->syncp); + if (ptype != HNS3_INVALID_PTYPE) { + skb->csum_level = hns3_rx_ptype_tbl[ptype].csum_level; + skb->ip_summed = hns3_rx_ptype_tbl[ptype].ip_summed; return; } @@ -3045,6 +3715,45 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, } } +static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, + u32 l234info, u32 bd_base_info, u32 ol_info, + u16 csum) +{ + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); + u32 ptype = HNS3_INVALID_PTYPE; + + skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); + + if (!(netdev->features & NETIF_F_RXCSUM)) + return; + + if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) + ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + + if (hns3_checksum_complete(ring, skb, ptype, csum)) + return; + + /* check if hardware has done checksum */ + if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) + return; + + if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | + BIT(HNS3_RXD_OL3E_B) | + BIT(HNS3_RXD_OL4E_B)))) { + u64_stats_update_begin(&ring->syncp); + ring->stats.l3l4_csum_err++; + u64_stats_update_end(&ring->syncp); + + return; + } + + hns3_rx_handle_csum(skb, l234info, ol_info, ptype); +} + static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb) { if (skb_has_frag_list(skb)) @@ -3226,8 +3935,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring) static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, u32 l234info, - u32 bd_base_info, u32 ol_info) + u32 bd_base_info, u32 ol_info, u16 csum) { + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); u32 l3_type; skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info, @@ -3235,7 +3946,8 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, HNS3_RXD_GRO_SIZE_S); /* if there is no HW GRO, do not set gro params */ if (!skb_shinfo(skb)->gso_size) { - hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info); + hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info, + csum); return 0; } @@ -3243,7 +3955,16 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, HNS3_RXD_GRO_COUNT_M, HNS3_RXD_GRO_COUNT_S); - l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); + if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) { + u32 ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + + l3_type = hns3_rx_ptype_tbl[ptype].l3_type; + } else { + l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, + HNS3_RXD_L3ID_S); + } + if (l3_type == HNS3_L3_TYPE_IPV4) skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; else if (l3_type == HNS3_L3_TYPE_IPV6) @@ -3276,6 +3997,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) struct hns3_desc *desc; unsigned int len; int pre_ntc, ret; + u16 csum; /* bdinfo handled below is only valid on the last BD of the * current packet, and ring->next_to_clean indicates the first @@ -3287,6 +4009,16 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) bd_base_info = le32_to_cpu(desc->rx.bd_base_info); l234info = le32_to_cpu(desc->rx.l234_info); ol_info = le32_to_cpu(desc->rx.ol_info); + csum = le16_to_cpu(desc->csum); + + if (unlikely(bd_base_info & BIT(HNS3_RXD_TS_VLD_B))) { + struct hnae3_handle *h = hns3_get_handle(netdev); + u32 nsec = le32_to_cpu(desc->ts_nsec); + u32 sec = le32_to_cpu(desc->ts_sec); + + if (h->ae_algo->ops->get_rx_hwts) + h->ae_algo->ops->get_rx_hwts(h, skb, nsec, sec); + } /* Based on hw strategy, the tag offloaded will be stored at * ot_vlan_tag in two layer tag case, and stored at vlan_tag @@ -3319,7 +4051,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) /* This is needed in order to enable forwarding support */ ret = hns3_set_gro_and_checksum(ring, skb, l234info, - bd_base_info, ol_info); + bd_base_info, ol_info, csum); if (unlikely(ret)) { u64_stats_update_begin(&ring->syncp); ring->stats.rx_err_cnt++; @@ -3458,139 +4190,30 @@ out: return recv_pkts; } -static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group) -{ -#define HNS3_RX_LOW_BYTE_RATE 10000 -#define HNS3_RX_MID_BYTE_RATE 20000 -#define HNS3_RX_ULTRA_PACKET_RATE 40 - - enum hns3_flow_level_range new_flow_level; - struct hns3_enet_tqp_vector *tqp_vector; - int packets_per_msecs, bytes_per_msecs; - u32 time_passed_ms; - - tqp_vector = ring_group->ring->tqp_vector; - time_passed_ms = - jiffies_to_msecs(jiffies - tqp_vector->last_jiffies); - if (!time_passed_ms) - return false; - - do_div(ring_group->total_packets, time_passed_ms); - packets_per_msecs = ring_group->total_packets; - - do_div(ring_group->total_bytes, time_passed_ms); - bytes_per_msecs = ring_group->total_bytes; - - new_flow_level = ring_group->coal.flow_level; - - /* Simple throttlerate management - * 0-10MB/s lower (50000 ints/s) - * 10-20MB/s middle (20000 ints/s) - * 20-1249MB/s high (18000 ints/s) - * > 40000pps ultra (8000 ints/s) - */ - switch (new_flow_level) { - case HNS3_FLOW_LOW: - if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE) - new_flow_level = HNS3_FLOW_MID; - break; - case HNS3_FLOW_MID: - if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE) - new_flow_level = HNS3_FLOW_HIGH; - else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE) - new_flow_level = HNS3_FLOW_LOW; - break; - case HNS3_FLOW_HIGH: - case HNS3_FLOW_ULTRA: - default: - if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE) - new_flow_level = HNS3_FLOW_MID; - break; - } - - if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE && - &tqp_vector->rx_group == ring_group) - new_flow_level = HNS3_FLOW_ULTRA; - - ring_group->total_bytes = 0; - ring_group->total_packets = 0; - ring_group->coal.flow_level = new_flow_level; - - return true; -} - -static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) +static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) { - struct hns3_enet_tqp_vector *tqp_vector; - u16 new_int_gl; - - if (!ring_group->ring) - return false; - - tqp_vector = ring_group->ring->tqp_vector; - if (!tqp_vector->last_jiffies) - return false; - - if (ring_group->total_packets == 0) { - ring_group->coal.int_gl = HNS3_INT_GL_50K; - ring_group->coal.flow_level = HNS3_FLOW_LOW; - return true; - } - - if (!hns3_get_new_flow_lvl(ring_group)) - return false; + struct hns3_enet_ring_group *rx_group = &tqp_vector->rx_group; + struct dim_sample sample = {}; - new_int_gl = ring_group->coal.int_gl; - switch (ring_group->coal.flow_level) { - case HNS3_FLOW_LOW: - new_int_gl = HNS3_INT_GL_50K; - break; - case HNS3_FLOW_MID: - new_int_gl = HNS3_INT_GL_20K; - break; - case HNS3_FLOW_HIGH: - new_int_gl = HNS3_INT_GL_18K; - break; - case HNS3_FLOW_ULTRA: - new_int_gl = HNS3_INT_GL_8K; - break; - default: - break; - } + if (!rx_group->coal.adapt_enable) + return; - if (new_int_gl != ring_group->coal.int_gl) { - ring_group->coal.int_gl = new_int_gl; - return true; - } - return false; + dim_update_sample(tqp_vector->event_cnt, rx_group->total_packets, + rx_group->total_bytes, &sample); + net_dim(&rx_group->dim, sample); } -static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) { - struct hns3_enet_ring_group *rx_group = &tqp_vector->rx_group; struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group; - bool rx_update, tx_update; + struct dim_sample sample = {}; - /* update param every 1000ms */ - if (time_before(jiffies, - tqp_vector->last_jiffies + msecs_to_jiffies(1000))) + if (!tx_group->coal.adapt_enable) return; - if (rx_group->coal.adapt_enable) { - rx_update = hns3_get_new_int_gl(rx_group); - if (rx_update) - hns3_set_vector_coalesce_rx_gl(tqp_vector, - rx_group->coal.int_gl); - } - - if (tx_group->coal.adapt_enable) { - tx_update = hns3_get_new_int_gl(tx_group); - if (tx_update) - hns3_set_vector_coalesce_tx_gl(tqp_vector, - tx_group->coal.int_gl); - } - - tqp_vector->last_jiffies = jiffies; + dim_update_sample(tqp_vector->event_cnt, tx_group->total_packets, + tx_group->total_bytes, &sample); + net_dim(&tx_group->dim, sample); } static int hns3_nic_common_poll(struct napi_struct *napi, int budget) @@ -3635,7 +4258,9 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget) if (napi_complete(napi) && likely(!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))) { - hns3_update_new_int_gl(tqp_vector); + hns3_update_rx_int_coalesce(tqp_vector); + hns3_update_tx_int_coalesce(tqp_vector); + hns3_mask_vector_irq(tqp_vector, 1); } @@ -3766,6 +4391,54 @@ static void hns3_nic_set_cpumask(struct hns3_nic_priv *priv) } } +static void hns3_rx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct hns3_enet_ring_group *group = container_of(dim, + struct hns3_enet_ring_group, dim); + struct hns3_enet_tqp_vector *tqp_vector = group->ring->tqp_vector; + struct dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + + hns3_set_vector_coalesce_rx_gl(group->ring->tqp_vector, cur_moder.usec); + tqp_vector->rx_group.coal.int_gl = cur_moder.usec; + + if (cur_moder.pkts < tqp_vector->rx_group.coal.int_ql_max) { + hns3_set_vector_coalesce_rx_ql(tqp_vector, cur_moder.pkts); + tqp_vector->rx_group.coal.int_ql = cur_moder.pkts; + } + + dim->state = DIM_START_MEASURE; +} + +static void hns3_tx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct hns3_enet_ring_group *group = container_of(dim, + struct hns3_enet_ring_group, dim); + struct hns3_enet_tqp_vector *tqp_vector = group->ring->tqp_vector; + struct dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + hns3_set_vector_coalesce_tx_gl(tqp_vector, cur_moder.usec); + tqp_vector->tx_group.coal.int_gl = cur_moder.usec; + + if (cur_moder.pkts < tqp_vector->tx_group.coal.int_ql_max) { + hns3_set_vector_coalesce_tx_ql(tqp_vector, cur_moder.pkts); + tqp_vector->tx_group.coal.int_ql = cur_moder.pkts; + } + + dim->state = DIM_START_MEASURE; +} + +static void hns3_nic_init_dim(struct hns3_enet_tqp_vector *tqp_vector) +{ + INIT_WORK(&tqp_vector->rx_group.dim.work, hns3_rx_dim_work); + tqp_vector->rx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + INIT_WORK(&tqp_vector->tx_group.dim.work, hns3_tx_dim_work); + tqp_vector->tx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; @@ -3779,6 +4452,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) tqp_vector = &priv->tqp_vector[i]; hns3_vector_coalesce_init_hw(tqp_vector, priv); tqp_vector->num_tqps = 0; + hns3_nic_init_dim(tqp_vector); } for (i = 0; i < h->kinfo.num_tqps; i++) { @@ -3974,10 +4648,13 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring = &priv->ring[q->tqp_index]; desc_num = priv->ae_handle->kinfo.num_tx_desc; ring->queue_index = q->tqp_index; + ring->tx_copybreak = priv->tx_copybreak; + ring->last_to_use = 0; } else { ring = &priv->ring[q->tqp_index + queue_num]; desc_num = priv->ae_handle->kinfo.num_rx_desc; ring->queue_index = q->tqp_index; + ring->rx_copybreak = priv->rx_copybreak; } hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type); @@ -3991,7 +4668,6 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring->desc_num = desc_num; ring->next_to_use = 0; ring->next_to_clean = 0; - ring->last_to_use = 0; } static void hns3_queue_to_ring(struct hnae3_queue *tqp, @@ -4051,6 +4727,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) ret = hns3_alloc_ring_buffers(ring); if (ret) goto out_with_desc; + } else { + hns3_init_tx_spare_buffer(ring); } return 0; @@ -4073,9 +4751,18 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) ring->next_to_use = 0; ring->last_to_use = 0; ring->pending_buf = 0; - if (ring->skb) { + if (!HNAE3_IS_TX_RING(ring) && ring->skb) { dev_kfree_skb_any(ring->skb); ring->skb = NULL; + } else if (HNAE3_IS_TX_RING(ring) && ring->tx_spare) { + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + dma_unmap_page(ring_to_dev(ring), tx_spare->dma, tx_spare->len, + DMA_TO_DEVICE); + free_pages((unsigned long)tx_spare->buf, + get_order(tx_spare->len)); + devm_kfree(ring_to_dev(ring), tx_spare); + ring->tx_spare = NULL; } } @@ -4358,13 +5045,21 @@ static int hns3_client_init(struct hnae3_handle *handle) hns3_dcbnl_setup(handle); - hns3_dbg_init(handle); + ret = hns3_dbg_init(handle); + if (ret) { + dev_err(priv->dev, "failed to init debugfs, ret = %d\n", + ret); + goto out_client_start; + } netdev->max_mtu = HNS3_MAX_MTU(ae_dev->dev_specs.max_frm_size); if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps)) set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state); + if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev)) + set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state); + set_bit(HNS3_NIC_STATE_INITED, &priv->state); if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) |